Exemple #1
0
 def build(self):
     self.paramlist = []
     for model_data_name in [
             'Lu_2016_invitro', 'Lu_2016_invivo',
             'Lu_2016_invitro_published', 'Lu_2016_invivo_published',
             'Spitale_2015_invitro', 'Spitale_2015_invivo'
     ]:
         model_paramlist = ParamFile(
             'selected_models/icSHAPE/{}.json'.format(
                 model_data_name)).to_list()
         for params in model_paramlist:
             if params['window_size'] >= 160:
                 continue
             params['model_data_name'] = params['data_name']
             params['model_experiment_type'] = 'icSHAPE'
             params['data_name'] = 'All'
             params['experiment_type'] = 'Known'
             self.paramlist.append(params)
     self.tool = ScoreStructure()
     self.tool.unique_name = 'd={data_name},md={model_data_name},p={percentile},w={window_size},m={model_name}'
     self.tool.inputs['indir'] = InputFile(
         'output/RME/Known,{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}'
     )
     self.tool.outputs['outfile'] = OutputFile(
         'reports/StructurePredictionMetrics/RME/Known,{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}.txt'
     )
Exemple #2
0
    def build(self):
        sequence_names = open('data/Known/names.txt').read().split()
        self.paramlist = []
        for model_data_name in [
                'Lu_2016_invitro', 'Lu_2016_invivo',
                'Lu_2016_invitro_published', 'Lu_2016_invivo_published',
                'Spitale_2015_invitro', 'Spitale_2015_invivo'
        ]:
            model_paramlist = ParamFile(
                'selected_models/icSHAPE/{}.json'.format(
                    model_data_name)).to_list()

            for params in model_paramlist:
                if params['window_size'] >= 160:
                    continue
                params['model_data_name'] = params['data_name']
                params['model_experiment_type'] = 'icSHAPE'
                params['data_name'] = 'All'
                params['experiment_type'] = 'Known'
                params['m'] = 0.1
                params['gamma1'] = 0.1
                params['gamma2'] = 0.1
                for name in sequence_names:
                    params_seq = dict(params)
                    params_seq['sequence_name'] = name
                    self.paramlist.append(params_seq)
        self.tool = RME()
        self.tool.unique_name = 'd={data_name},md={model_data_name},p={percentile},w={window_size},m={model_name},s={sequence_name}'
        self.tool.inputs['infile'] = InputFile(
            'output/deepfold/Known,{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}/{sequence_name}'
        )
        self.tool.outputs['outdir'] = OutputFile(
            'output/RME/Known,{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}'
        )
Exemple #3
0
 def build(self):
     sequence_dir = 'data/Known/fasta'
     sequence_names = map(lambda x: os.path.splitext(x)[0],
                          os.listdir(sequence_dir))
     self.paramlist = ParamGrid({
         'data_name': ['All'],
         'sequence_name': sequence_names
     }).to_list()
     self.tool = MaxExpect()
     self.tool.unique_name = 'd={data_name},s={sequence_name}'
     self.tool.inputs['infile'] = InputFile(
         'data/Known/fasta/{sequence_name}.fa')
     self.tool.outputs['pfsfile'] = OutputFile(
         'output/MaxExpect/Known/{sequence_name}.pfs')
     self.tool.outputs['outfile'] = OutputFile(
         'output/MaxExpect/Known/{sequence_name}.ct')
Exemple #4
0
 def build(self):
     self.inputs = {}
     self.outputs = {
         'outfile':
         OutputFile(
             'reports/MetricTable/{experiment_type}/d={data_name},r={region}.txt'
         )
     }
Exemple #5
0
 def build(self):
     self.inputs = {
         'infile':
         InputFile(
             'data/icSHAPE/{data_name}/deepfold/r={region},p={percentile},w={window_size}'
         )
     }
     self.outputs = {
         'cvdir':
         OutputFile(
             'trained_models/icSHAPE/{data_name}/r={region},p={percentile},w={window_size},m={model_name}.cv'
         ),
         'model_file':
         OutputFile(
             'trained_models/icSHAPE/{data_name}/r={region},p={percentile},w={window_size},m={model_name}'
         )
     }
Exemple #6
0
 def build(self):
     self.inputs = {
         'infile': InputFile('data/Known/{data_name}/known.h5'),
         'sequence_file': InputFile('data/Known/{data_name}/sequences.fa')
     }
     self.outputs = {
         'outfile':
         OutputFile('data/Known/{data_name}/deepfold/w={window_size}')
     }
Exemple #7
0
 def build(self):
     self.inputs = {
         'infile':
         InputFile('data/Known/{data_name}/deepfold/w={window_size}')
     }
     self.outputs = {
         'model_file':
         OutputFile(
             'trained_models/Known/{data_name}/w={window_size},m={model_name}'
         )
     }
Exemple #8
0
 def build(self):
     self.inputs = {
         'infile':
         InputFile('data/icSHAPE/{data_name}/{region}'),
         'sequence_file':
         InputFile(
             '/Share/home/shibinbin/data/gtf/gencode.{gencode_version}/sequences/{region}.transcript.fa'
         )
     }
     self.outputs = {
         'outfile':
         OutputFile(
             'data/icSHAPE/{data_name}/deepfold/r={region},p={percentile},w={window_size},dense=1'
         )
     }
Exemple #9
0
 def build(self):
     self.inputs = {
         'model_file':
         InputFile(
             'trained_models/{model_experiment_type}/{model_data_name}/p={percentile},w={window_size},m={model_name}.h5'
         ),
         'infile':
         InputFile('data/Known/ct')
     }
     self.outputs = {
         'outdir':
         OutputFile(
             'output/deepfold/Known,{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}'
         )
     }
Exemple #10
0
 def build(self):
     self.inputs = {
         'infile':
         InputFile(
             'data/{experiment_type}/{data_name}/deepfold/p={percentile},w={window_size}'
         ),
         'model_file':
         InputFile(
             'trained_models/{model_experiment_type}/{model_data_name}/p={percentile},w={window_size},m={model_name}'
         )
     }
     self.outputs = {
         'outfile':
         OutputFile(
             'metrics/cross/{experiment_type},{data_name}/{model_experiment_type},{model_data_name}/p={percentile},w={window_size},m={model_name}'
         )
     }
Exemple #11
0
    def generate_commands(self, params, task_name=None, command_only=False):
        if params.get('dense'):
            self.outputs['outfile'] = OutputFile(
                'data/icSHAPE/{data_name}/deepfold/r={region},p={percentile},w={window_size},dense=1'
            )
        if params.get('bumhmm') and params.get('dense'):
            self.command = '''bin/preprocess.py CreateDatasetFromGenomicData
-i {infile}
--sequence-file {sequence_file}
--stride 1
--train-test-split 0.8
--seed 24663
--dense-output
--min-coverage 0.05
--cutoff1 0.4
--cutoff2 0.6
--window-size {window_size}
-o {outfile}'''
        elif params.get('bumhmm') and not params.get('dense'):
            self.command = '''bin/preprocess.py CreateDatasetFromGenomicData
-i {infile}
--sequence-file {sequence_file}
--stride 1
--train-test-split 0.8
--seed 24663
--cutoff1 0.4
--cutoff2 0.6
--window-size {window_size}
-o {outfile}'''
        elif not params.get('bumhmm') and params.get('dense'):
            self.command = '''bin/preprocess.py CreateDatasetFromGenomicData
-i {infile}
--sequence-file {sequence_file}
--stride 1
--train-test-split 0.8
--seed 24663
--dense-output
--min-coverage 0.05
--percentile {percentile}
--window-size {window_size}
-o {outfile}'''
        return super(self.__class__,
                     self).generate_commands(params, task_name, command_only)
Exemple #12
0
 def build(self):
     self.inputs = {'infile': InputFile()}
     self.outputs = {'outdir': OutputFile()}