from dataLoader import get_batch from utils import calc_accuracy,get_parameter_list index = int(random.random() * 10000) logging.getLogger().setLevel(logging.INFO) logging.basicConfig(level=logging.INFO) logging.info('# Config') config = Config() cfg = config.get_args() #TODO Save config logging.info('# Load model') parameter_list = get_parameter_list(cfg) for param in parameter_list: for fold in range(cfg.fold_num): model_output = '%s_fold%d_epoch%2dL_devAcc%.2f' % (cfg.version, fold, epoch, accuracy) parameter_string = 'layer_num_%d_cell_num_%d_dropout_%.2f' % (param['layer_num'], param['cell_num'], param['dropout']) ckpt_path = os.path.join(cfg.result_path, cfg.version, 'index{}_models'.format(str(index)), parameter_string, str(fold)) logging.info('#Preprocessing train/eval batches') train_batches, num_train_batches, num_train_samples = get_batch(cfg.data_npy_path, cfg.filename_x_train, cfg.filename_y_train, cfg.epochs, cfg.maxlen, cfg.len_wv, cfg.batch_size[0],
def _project_experimentstable_experiments(dbs, confs, raw=True, where=False): """Return a list of experiments for a project.""" conf = confs['configurations'][0] # Only return the experiment infos if this is an official project sql = """ select experiment_id, species_info.species, genome_files.genome, genome_files.location, genome_files.assembly, genome_files.gender, annotation_files.annotation, annotation_files.location, annotation_files.version, template_file, read_length, mismatches, exp_description, expDate, CellType, RNAType, Compartment, Bioreplicate, partition, annotation_version, lab, paired from experiments, species_info, genome_files, annotation_files """ if where: meta = get_experiment_dict(confs) sql = """%s %s and """ % (sql, get_experiment_where(confs, meta)) else: sql = """%s where project_id = '%s' and """ % (sql, conf['projectid']) sql = """%s experiments.species_id = species_info.species_id and experiments.genome_id = genome_files.genome_id and experiments.annotation_id = annotation_files.annotation_id """ % sql sql = """%s %s""" % (sql, get_experiment_order_by(confs)) cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() experimentids = {} rna_extracts = get_rna_extract_display_mapping(dbs) cells = get_cell_display_mapping(dbs) localizations = get_localization_display_mapping(dbs) for row in rows: meta = {} meta['projectid'] = conf['projectid'] meta['read_length'] = row[10] meta['cell'] = row[14] meta['rnaExtract'] = row[15] meta['localization'] = row[16] meta['bio_replicate'] = row[17] meta['partition'] = row[18] meta['annotation_version'] = row[19] meta['lab'] = row[20] meta['paired'] = row[21] if not meta['paired'] is None: meta['paired'] = ord(meta['paired']) meta['parameter_list'] = get_parameter_list(confs) meta['parameter_values'] = get_parameter_values(confs, meta) if not raw: get_experiment_labels(meta, rna_extracts, cells, localizations) if meta['parameter_values'] in experimentids: experimentids[meta['parameter_values']].append(meta) else: experimentids[meta['parameter_values']] = [meta] return experimentids
def project_replicates(dbs, confs): """Compile the list of replicates for the project""" conf = confs['configurations'][0] projectid = conf['projectid'] description = [('Project Id', 'string'), ('Replicate Id', 'string'), ('Species', 'string'), ('Genome file name', 'string'), ('Genome file location', 'string'), ('Genome assembly', 'string'), ('Genome gender', 'string'), ('Annotation file name', 'string'), ('Annotation file location', 'string'), ('Annotation version', 'string'), ('Template File', 'string'), ('Read Length', 'number'), ('Mismatches', 'number'), ('Replicate Description', 'string'), ('Replicate Date', 'string'), ('Cell Type', 'string'), ('RNA Type', 'string'), ('Localization', 'string'), ('Bioreplicate', 'string'), ('Partition', 'string'), ('Annotation Version', 'string'), ('Lab', 'string'), ('Paired', 'number'), ('URL', 'string'), ] chart = {} chart['table_description'] = description sql = """ select project_id, experiment_id, species_info.species, genome_files.genome, genome_files.location, genome_files.assembly, genome_files.gender, annotation_files.annotation, annotation_files.location, annotation_files.version, template_file, read_length, mismatches, exp_description, expDate, CellType, RNAType, Compartment, Bioreplicate, partition, annotation_version, lab, paired from experiments, species_info, genome_files, annotation_files where project_id='%s' and experiments.species_id = species_info.species_id and experiments.genome_id = genome_files.genome_id and experiments.annotation_id = annotation_files.annotation_id; """ % projectid cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() url = ('/project/%(projectid)s/' '%(parameter_list)s/%(parameter_values)s/' 'replicate/%(replicateid)s') results = [] for row in rows: row = list(row) if not row[22] is None: row[22] = ord(row[22]) # Augment the information from the database with a url and a text meta = {'projectid': row[0], 'replicateid': row[1], 'read_length': row[11], 'cell': row[15], 'rnaExtract': row[16], 'localization': row[17], 'bio_replicate': row[18], 'partition': row[19], 'annotation_version': row[20], 'lab': row[21], 'paired': row[22], } meta['parameter_list'] = get_parameter_list(confs) meta['parameter_values'] = get_parameter_values(confs, meta) results.append(row + [url % meta]) chart['table_data'] = results return chart
def project_experiments(dbs, confs): """Query the database for a list of experiments for a project.""" conf = confs['configurations'][0] projectid = conf['projectid'] chart = {} chart['table_description'] = [('Project Id', 'string'), ('Replicate Id', 'string'), ('Species', 'string'), ('Genome file name', 'string'), ('Genome file location', 'string'), ('Genome assembly', 'string'), ('Genome gender', 'string'), ('Annotation file name', 'string'), ('Annotation file location', 'string'), ('Annotation version', 'string'), ('Template File', 'string'), ('Read Length', 'number'), ('Mismatches', 'number'), ('Replicate Description', 'string'), ('Replicate Date', 'string'), ('Cell Type', 'string'), ('RNA Type', 'string'), ('Localization', 'string'), ('Bioreplicate', 'string'), ('Partition', 'string'), ('Annotation Version', 'string'), ('Lab', 'string'), ('Paired', 'number'), ('URL', 'string'), ] sql = """ select project_id, experiment_id, species_info.species, genome_files.genome, genome_files.location, genome_files.assembly, genome_files.gender, annotation_files.annotation, annotation_files.location, annotation_files.version, template_file, read_length, mismatches, exp_description, expDate, CellType, RNAType, Compartment, Bioreplicate, partition, annotation_version, lab, paired from experiments, species_info, genome_files, annotation_files where project_id='%s' and experiments.species_id = species_info.species_id and experiments.genome_id = genome_files.genome_id and experiments.annotation_id = annotation_files.annotation_id; """ % projectid cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() results = [] url = '/project/%(projectid)s/' url += '%(parameter_list)s/%(parameter_values)s' for row in rows: # Augment the information from the database with a url and a text row = list(row) if not row[22] is None: row[22] = ord(row[22]) meta = {'projectid': row[0], 'read_length': row[11], 'cell': row[15], 'rna_extract': row[16], 'localization': row[17], 'bio_replicate': row[18], 'partition': row[19], 'annotation_version': row[20], 'lab': row[21], 'paired': row[22]} meta['parameter_list'] = get_parameter_list(confs) meta['parameter_values'] = get_parameter_values(confs, meta) row.append(url % meta) results.append(row) chart['table_data'] = results return chart