from dataLoader import get_batch
from utils import calc_accuracy,get_parameter_list

index = int(random.random() * 10000)
logging.getLogger().setLevel(logging.INFO)
logging.basicConfig(level=logging.INFO)

logging.info('# Config')
config = Config()
cfg = config.get_args()
#TODO Save config



logging.info('# Load model')
parameter_list = get_parameter_list(cfg)
for param in parameter_list:

	for fold in range(cfg.fold_num):

		model_output = '%s_fold%d_epoch%2dL_devAcc%.2f' % (cfg.version, fold, epoch, accuracy)
		parameter_string = 'layer_num_%d_cell_num_%d_dropout_%.2f' % (param['layer_num'],
																	  param['cell_num'],
																	  param['dropout'])
		ckpt_path = os.path.join(cfg.result_path, cfg.version, 'index{}_models'.format(str(index)), parameter_string,
								 str(fold))

		logging.info('#Preprocessing train/eval batches')
		train_batches, num_train_batches, num_train_samples = get_batch(cfg.data_npy_path, cfg.filename_x_train,
																		cfg.filename_y_train, cfg.epochs,
																		cfg.maxlen, cfg.len_wv, cfg.batch_size[0],
Example #2
0
def _project_experimentstable_experiments(dbs, confs, raw=True, where=False):
    """Return a list of experiments for a project."""
    conf = confs['configurations'][0]
    # Only return the experiment infos if this is an official project
    sql = """
select experiment_id,
       species_info.species,
       genome_files.genome,
       genome_files.location,
       genome_files.assembly,
       genome_files.gender,
       annotation_files.annotation,
       annotation_files.location,
       annotation_files.version,
       template_file,
       read_length,
       mismatches,
       exp_description,
       expDate,
       CellType,
       RNAType,
       Compartment,
       Bioreplicate,
       partition,
       annotation_version,
       lab,
       paired
from experiments,
     species_info,
     genome_files,
     annotation_files
"""
    if where:
        meta = get_experiment_dict(confs)
        sql = """%s
%s
and
""" % (sql, get_experiment_where(confs, meta))
    else:
        sql = """%s
where
    project_id = '%s'
and
""" % (sql, conf['projectid'])

    sql = """%s
      experiments.species_id = species_info.species_id
and
      experiments.genome_id = genome_files.genome_id
and
      experiments.annotation_id = annotation_files.annotation_id
""" % sql

    sql = """%s
%s""" % (sql, get_experiment_order_by(confs))

    cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql)
    rows = cursor.fetchall()
    cursor.close()
    experimentids = {}

    rna_extracts = get_rna_extract_display_mapping(dbs)
    cells = get_cell_display_mapping(dbs)
    localizations = get_localization_display_mapping(dbs)

    for row in rows:
        meta = {}
        meta['projectid'] = conf['projectid']
        meta['read_length'] = row[10]
        meta['cell'] = row[14]
        meta['rnaExtract'] = row[15]
        meta['localization'] = row[16]
        meta['bio_replicate'] = row[17]
        meta['partition'] = row[18]
        meta['annotation_version'] = row[19]
        meta['lab'] = row[20]
        meta['paired'] = row[21]
        if not meta['paired'] is None:
            meta['paired'] = ord(meta['paired'])
        meta['parameter_list'] = get_parameter_list(confs)
        meta['parameter_values'] = get_parameter_values(confs, meta)

        if not raw:
            get_experiment_labels(meta, rna_extracts, cells, localizations)

        if meta['parameter_values'] in experimentids:
            experimentids[meta['parameter_values']].append(meta)
        else:
            experimentids[meta['parameter_values']] = [meta]
    return experimentids
Example #3
0
def project_replicates(dbs, confs):
    """Compile the list of replicates for the project"""
    conf = confs['configurations'][0]
    projectid = conf['projectid']
    description = [('Project Id', 'string'),
                   ('Replicate Id', 'string'),
                   ('Species', 'string'),
                   ('Genome file name', 'string'),
                   ('Genome file location', 'string'),
                   ('Genome assembly', 'string'),
                   ('Genome gender', 'string'),
                   ('Annotation file name', 'string'),
                   ('Annotation file location', 'string'),
                   ('Annotation version', 'string'),
                   ('Template File', 'string'),
                   ('Read Length', 'number'),
                   ('Mismatches', 'number'),
                   ('Replicate Description', 'string'),
                   ('Replicate Date', 'string'),
                   ('Cell Type', 'string'),
                   ('RNA Type', 'string'),
                   ('Localization', 'string'),
                   ('Bioreplicate', 'string'),
                   ('Partition', 'string'),
                   ('Annotation Version', 'string'),
                   ('Lab', 'string'),
                   ('Paired', 'number'),
                   ('URL', 'string'),
                   ]
    chart = {}
    chart['table_description'] = description

    sql = """
select project_id,
       experiment_id,
       species_info.species,
       genome_files.genome,
       genome_files.location,
       genome_files.assembly,
       genome_files.gender,
       annotation_files.annotation,
       annotation_files.location,
       annotation_files.version,
       template_file,
       read_length,
       mismatches,
       exp_description,
       expDate,
       CellType,
       RNAType,
       Compartment,
       Bioreplicate,
       partition,
       annotation_version,
       lab,
       paired
from experiments,
     species_info,
     genome_files,
     annotation_files
where
      project_id='%s'
and
      experiments.species_id = species_info.species_id
and
      experiments.genome_id = genome_files.genome_id
and
      experiments.annotation_id = annotation_files.annotation_id;
""" % projectid
    cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql)
    rows = cursor.fetchall()
    cursor.close()

    url = ('/project/%(projectid)s/'
           '%(parameter_list)s/%(parameter_values)s/'
           'replicate/%(replicateid)s')
    results = []
    for row in rows:
        row = list(row)
        if not row[22] is None:
            row[22] = ord(row[22])
        # Augment the information from the database with a url and a text
        meta = {'projectid': row[0],
                'replicateid': row[1],
                'read_length': row[11],
                'cell': row[15],
                'rnaExtract': row[16],
                'localization': row[17],
                'bio_replicate': row[18],
                'partition': row[19],
                'annotation_version': row[20],
                'lab': row[21],
                'paired': row[22],
                }
        meta['parameter_list'] = get_parameter_list(confs)
        meta['parameter_values'] = get_parameter_values(confs, meta)
        results.append(row + [url % meta])
    chart['table_data'] = results
    return chart
Example #4
0
def project_experiments(dbs, confs):
    """Query the database for a list of experiments for a project."""
    conf = confs['configurations'][0]
    projectid = conf['projectid']

    chart = {}
    chart['table_description'] = [('Project Id', 'string'),
                                  ('Replicate Id', 'string'),
                                  ('Species', 'string'),
                                  ('Genome file name', 'string'),
                                  ('Genome file location', 'string'),
                                  ('Genome assembly', 'string'),
                                  ('Genome gender', 'string'),
                                  ('Annotation file name', 'string'),
                                  ('Annotation file location', 'string'),
                                  ('Annotation version', 'string'),
                                  ('Template File', 'string'),
                                  ('Read Length', 'number'),
                                  ('Mismatches', 'number'),
                                  ('Replicate Description', 'string'),
                                  ('Replicate Date', 'string'),
                                  ('Cell Type', 'string'),
                                  ('RNA Type', 'string'),
                                  ('Localization', 'string'),
                                  ('Bioreplicate', 'string'),
                                  ('Partition', 'string'),
                                  ('Annotation Version', 'string'),
                                  ('Lab', 'string'),
                                  ('Paired', 'number'),
                                  ('URL', 'string'),
                                  ]

    sql = """
select project_id,
       experiment_id,
       species_info.species,
       genome_files.genome,
       genome_files.location,
       genome_files.assembly,
       genome_files.gender,
       annotation_files.annotation,
       annotation_files.location,
       annotation_files.version,
       template_file,
       read_length,
       mismatches,
       exp_description,
       expDate,
       CellType,
       RNAType,
       Compartment,
       Bioreplicate,
       partition,
       annotation_version,
       lab,
       paired
from experiments,
     species_info,
     genome_files,
     annotation_files
where
      project_id='%s'
and
      experiments.species_id = species_info.species_id
and
      experiments.genome_id = genome_files.genome_id
and
      experiments.annotation_id = annotation_files.annotation_id;
""" % projectid
    cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql)
    rows = cursor.fetchall()
    cursor.close()
    results = []
    url = '/project/%(projectid)s/'
    url += '%(parameter_list)s/%(parameter_values)s'
    for row in rows:
        # Augment the information from the database with a url and a text
        row = list(row)
        if not row[22] is None:
            row[22] = ord(row[22])
        meta = {'projectid': row[0],
                'read_length': row[11],
                'cell': row[15],
                'rna_extract': row[16],
                'localization': row[17],
                'bio_replicate': row[18],
                'partition': row[19],
                'annotation_version': row[20],
                'lab': row[21],
                'paired': row[22]}
        meta['parameter_list'] = get_parameter_list(confs)
        meta['parameter_values'] = get_parameter_values(confs, meta)
        row.append(url % meta)
        results.append(row)
    chart['table_data'] = results
    return chart