def run(self, network, antecedents, out_attributes, user_options,
            num_cores, outfile):
        """merge three signal file to generate a joined signal file"""
        import os
        from Betsy import module_utils

        merge_node1, merge_node2 = antecedents
        assert os.path.exists(merge_node1.identifier), \
            'File not found: %s' % merge_node1.identifier
        assert os.path.exists(merge_node2.identifier), \
            'File not found: %s' % merge_node2.identifier

        file1, file2 = module_utils.convert_to_same_platform(
            merge_node1.identifier, merge_node2.identifier)
        f = file(outfile, 'w')
        module_utils.merge_two_files(file1, file2, f)
        f.close()
Example #2
0
 def run(self, network, antecedents, out_attributes, user_options,
         num_cores, outfile):
     """merge three signal file to generate a joined signal file"""
     import os
     from genomicode import filelib
     from Betsy import module_utils
     merge_node1, merge_node2 = antecedents
     assert os.path.exists(merge_node1.identifier), (
         'the merge_file1 %s in merge_data does not exist' %
         merge_node1.identifier)
     assert os.path.exists(merge_node2.identifier), (
         'the merge_file2 %s in merge_data does not exist' %
         merge_node2.identifier)
     file1, file2 = module_utils.convert_to_same_platform(
         merge_node1.identifier, merge_node2.identifier)
     f = file(outfile, 'w')
     module_utils.merge_two_files(file1, file2, f)
     f.close()
     assert filelib.exists_nz(outfile), (
         'the output file %s for merge_data fails' % outfile)
Example #3
0
 def run(self, network, antecedents, out_attributes, user_options,
         num_cores, outfile):
     import subprocess
     from Betsy import module_utils
     from genomicode import config
     rma_node, mas5_node = antecedents
     scoresig_path = config.scoresig
     scoresig_BIN = module_utils.which(scoresig_path)
     assert scoresig_BIN, 'cannot find the %s' % scoresig_path
     file1, file2 = module_utils.convert_to_same_platform(
         rma_node.identifier, mas5_node.identifier)
     command = [
         'python', scoresig_BIN, '-r', file1, '-m', file2, '-j', '20', '-o',
         outfile
     ]
     process = subprocess.Popen(command,
                                shell=False,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
     error_message = process.communicate()[1]
     if error_message:
         raise ValueError(error_message)
    def run(
        self, network, antecedents, out_attributes, user_options, num_cores,
        outfile):
        import os
        import subprocess
        import arrayio
        from genomicode import filelib
        from Betsy import read_label_file
        from Betsy import module_utils
        from genomicode import config
        data_node_train, data_node_test, cls_node_train = antecedents
        module_name = 'WeightedVoting'
        gp_parameters = dict()
        file1, file2 = module_utils.convert_to_same_platform(
            data_node_train.identifier, data_node_test.identifier)
        result, label_line, class_name = read_label_file.read(
            cls_node_train.identifier)
        M = arrayio.read(data_node_test.identifier)
        label_line = ['0'] * M.dim()[1]
        read_label_file.write('temp_test.cls', class_name, label_line)
        gp_parameters['train.filename'] = file1
        gp_parameters['train.class.filename'] = cls_node_train.identifier
        gp_parameters['test.filename'] = file2
        gp_parameters['test.class.filename'] = 'temp_test.cls'
        if 'wv_num_features' in user_options:
            gp_parameters['num.features'] = str(user_options['wv_num_features'])
        
        if 'wv_minstd' in user_options:
            assert module_utils.is_number(
                user_options['wv_minstd']), 'the sv_minstd should be number'
            gp_parameters['min.std'] = str(user_options['wv_minstd'])

        
        wv_feature_stat = ['wv_snr', 'wv_ttest', 'wv_snr_median',
                           'wv_ttest_median', 'wv_snr_minstd', 'wv_ttest_minstd',
                           'wv_snr_median_minstd', 'wv_ttest_median_minstd']

        assert out_attributes['wv_feature_stat'] in wv_feature_stat, (
            'the wv_feature_stat is invalid'
        )
        gp_parameters['feature.selection.statistic'] = str(
            wv_feature_stat.index(out_attributes['wv_feature_stat']))
        gp_path = config.genepattern
        gp_module = module_utils.which(gp_path)
        assert gp_module, 'cannot find the %s' % gp_path
        download_directory = os.path.join(".", 'wv_result')
        command = [gp_module, module_name, '-o', download_directory]
        for key in gp_parameters.keys():
            a = ['--parameters', key + ':' + gp_parameters[key]]
            command.extend(a)
        
        process = subprocess.Popen(command,
                                   shell=False,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        process.wait()
        error_message = process.communicate()[1]
        if error_message:
            raise ValueError(error_message)
        
        assert os.path.exists(download_directory), (
            'there is no output directory for weightedVoting'
        )
        result_files = os.listdir(download_directory)
        assert 'stderr.txt' not in result_files, 'gene_pattern get error'
        gp_files = os.listdir(download_directory)
        for gp_file in gp_files:
            if gp_file.endswith('pred.odf'):
                gp_file = os.path.join(download_directory, gp_file)
                f = file(gp_file, 'r')
                text = f.readlines()
                f.close()
                os.rename(os.path.join(download_directory, gp_file),
                          os.path.join(download_directory, 'prediction.odf'))
                assert text[1][0:12] == 'HeaderLines='
                start = int(text[1][12:-1])
                newresult = [['Sample_name', 'Predicted_class', 'Confidence']]
                for i in text[start + 2:]:
                    line = i.split()
                    n = len(line)
                    newline = [' '.join(line[0:n - 4]), line[n - 3], line[n - 2]]
                    newresult.append(newline)
                f = file(outfile, 'w')
                for i in newresult:
                    f.write('\t'.join(i))
                    f.write('\n')
                f.close()
        
        assert filelib.exists_nz(outfile), (
            'the output file %s for classify_with_weighted_voting fails' % outfile
        )