def test_transform_boxcox(self):
     x = np.hstack([np.random.standard_cauchy(size=(1000, 2))**2, np.exp(np.random.normal(size=(1000, 2)))])
     out = g.Gaussianize(strategy='boxcox')  # Only works on positive data
     out.fit(x)
     y = out.transform(x)
     x_prime = out.inverse_transform(y)
     assert np.allclose(x_prime, x)
     out.qqplot(x, 'boxcox')
 def test_transform_lambert(self):
     x = np.hstack([np.random.standard_cauchy(size=(1000, 2)), np.random.normal(size=(1000, 2))])
     out = g.Gaussianize()
     out.fit(x)
     y = out.transform(x)
     x_prime = out.inverse_transform(y)
     assert np.allclose(x_prime, x)
     out.qqplot(x, 'lambert')
Beispiel #3
0
def to_gaussian(score_list):
    x = np.array(score_list)
    out = g.Gaussianize(strategy='brute')
    out.fit(x)
    y = out.transform(x)
    return y.flatten()
Beispiel #4
0
    def single_process(filename_train,filename_test,filename_save):
        #filename=filename_list[idx]
        #print filename
        name_list_train,score_list_train=read_RNAcompete_data_2(filename_train)
        score_list_train,name_list_train=shuffle_RNAcompete(score_list_train,name_list_train)

        name_list_test,score_list_test=read_RNAcompete_data_2(filename_test)
        score_list_test,name_list_test=shuffle_RNAcompete(score_list_test,name_list_test)
        #score_list_train,name_list_train,score_list_test,name_list_test=leave_out(score_list,name_list)

        preprocess_scaler=gaussianize.Gaussianize(strategy='brute')
        preprocess_scaler.fit(score_list_train)
        score_list_train=preprocess_scaler.transform(score_list_train)
        score_list_train/=2.5
        '''
        plt.hist(score_list_train)#,plt.show()
        plt.savefig('input_'+filename+'.png')
        plt.close()
        '''

        filename_positives=os.path.join(output_pathname,'tmp_'+filename_save+'.positives.fa')
        filename_negatives=os.path.join(output_pathname,'tmp_'+filename_save+'.negatives.fa')
        #print filename_positives,filename_negatives
        generate_RNAcompete_training_data(name_list_train,score_list_train,filename_positives,filename_negatives)
        
        filename_test_tmp=os.path.join(output_pathname,'tmp_'+filename_save+'.test.fa')
        fout_test=open(filename_test_tmp,'w')
        for name in name_list_test:
            print >>fout_test,'>',name
            print >>fout_test,name
        fout_test.close()
          
        scaler,_model=training(output_pathname,'tmp_'+filename_save.replace('.txt',''))
        pred=predicting2(filename_test_tmp,scaler,_model,None)

        #pred=preprocess_scaler.invert(pred)
        #print pred-score_list_test

        score_list_test=preprocess_scaler.transform(score_list_test)
        score_list_test/=2.5
        score_list_test=score_list_test.ravel()

        #print 'ave:',np.abs(pred-score_list_test).mean()
        coef=stats.pearsonr(pred,score_list_test)
        print filename_test,'correlation:',coef
        plt.scatter(score_list_test,pred),plt.plot([vmin,vmax],[vmin,vmax],'r'),plt.xlim(vmin,vmax),plt.ylim(vmin,vmax)#,plt.show()
        plt.savefig(os.path.join(output_pathname,filename_save+'.png'))
        plt.close()

        #save = np.array([pred,score_list_test])
        filename_save=os.path.join(output_pathname,filename_save+'.pred')
        fout_save = open(filename_save,'w')
        for i in xrange(len(name_list_test)):
            print >>fout_save,name_list_test[i],pred[i]
        fout_save.close()

        #print filename_positives
        #print filename_test
        os.remove(filename_positives)
        os.remove(filename_negatives)
        os.remove(filename_test_tmp)

        return coef