def save_template(plate_id, template): for (cell, row, col) in iterate96WP(): if not template[cell]['prey']: continue bait = prey = None is_NC = is_PC = False if template[cell]['prey'] == POS_CONTROL: is_PC = True elif template[cell]['prey'] == NEG_CONTROL: is_NC = True else: bait = template[cell]['bait'] prey = template[cell]['prey'] plate_cell = PlateCell(row=row, column=col, bait=bait, prey=prey, is_NC=is_NC, is_PC=is_PC, plate_id=plate_id) db.add(plate_cell) db.commit()
def init_db(proteins_list): db_reset() # Import proteins list mapping = { 'A': 'family', 'C': 'id', 'I': 'symbol', 'J': 'long_symbol', 'K': 'description' } df_proteins = read_excel_list(proteins_list, 'Proteins List', mapping) df_proteins['id'] = df_proteins['id'].str.upper() df_proteins['symbol'].fillna(df_proteins.id, inplace=True) # Validate data df_invalid_id = df_proteins[~df_proteins.id.str.contains( "^{}$".format(PROTEIN_ID_REGEX), regex=True, na=False)] assert_empty_df( df_invalid_id, "Some of the provided ids in the protein list are invalid") df_duplicate_id = df_proteins[df_proteins.duplicated(subset='id', keep=False)] assert_empty_df( df_duplicate_id, "Some of the provided ids in the protein list are duplicated") df_no_family = df_proteins[df_proteins.family.isnull()] assert_empty_df( df_no_family, "Some of the proteins in the reference list don't have an assigned family" ) # Insert proteins into DB proteins = df_proteins.to_dict(orient='records') db.execute(Protein.__table__.insert(), proteins) # Import batch sheets mapping = { 'A': '#', 'B': 'subfamily', 'C': 'id', 'D': 'symbol', 'E': 'nickname', 'J': 'cloned' } for sheet in get_batch_sheets(proteins_list.name): import_batch_sheet(proteins_list.name, sheet, mapping, df_proteins.id) db.commit()
def plates_first_batch(first_batch_list, outfolder): batch_id = 1 metadata = OrderedDict() metadata['Notes'] = 'Generated from the original batch 1 template' metadata['Batch 1 file'] = '' plates, base_template = parse_1st_batch_list(first_batch_list) # Index ids for batch, by sybmol proteins = get_batch_proteins(batch_id) prots_by_symbol = {protein.symbol:protein for protein in proteins} replace_prey_symbols(base_template, prots_by_symbol) # Remove plates from db db.query(Plate).filter(Plate.bait_batch_id==batch_id, Plate.prey_batch_id==batch_id ).delete() for id, plate in enumerate(plates): bait_1 = prots_by_symbol.get(plate['Bait_1'], None) bait_2 = prots_by_symbol.get(plate['Bait_2'], None) template = add_baits(base_template, bait_1, bait_2) # Plate 4 of the first batch was put turned 180 in the reader if plate['filename'] == 'batch 4 SERK1 BAK1': template = reverse(template) plate_id = id + 1 plate_name = screen_plate_name(plate_id) metadata['Batch 1 file'] = plate['filename'] metadata['Timeshift'] = plate['timepoint'] # Save plate to database plate = Plate(id=plate_id, bait_batch_id=batch_id, prey_batch_id=batch_id) db.add(plate) db.commit() save_template(plate_id, template) # TODO: export plate from database? export_1st_batch_plate(plate_name, template, metadata, outfolder)
def create_screen_plates(bait_batch_id, prey_batch_id, outfolder): metadata = OrderedDict() metadata['Plate name'] = '' metadata['Plate type'] = 'Screen' metadata['Timeshift'] = '' metadata['Bait plate'] = '' metadata['Prey plate'] = storage_prey_plate_name(prey_batch_id) # Same prey plate for all screen plates prey_prots = get_batch_proteins(prey_batch_id) prey_plate = PreyStoragePlate(prey_prots) prey_plate.name = storage_prey_plate_name(prey_batch_id) # 2 bait proteins per plate bait_prots = get_batch_proteins(bait_batch_id) for i in range(0, len(bait_prots), ScreenPlate.capacity()): plate = Plate(bait_batch_id=bait_batch_id, prey_batch_id=prey_batch_id) db.add(plate) db.flush() baits = bait_prots[i:i + ScreenPlate.capacity()] bait_plate_name = storage_bait_plate_name(bait_batch_id, BaitStoragePlate.bait_plate_index(i)) screen_plate = ScreenPlate(baits, bait_plate_name, BaitStoragePlate.bait_plate_offset(i), prey_plate) screen_plate.name = screen_plate_name(plate.id) metadata['Plate name'] = screen_plate.name metadata['Bait plate'] = bait_plate_name metadata['Prey plate'] = screen_plate.prey_plate_name template_file = os.path.join(outfolder, '{}_b{:02d}_p{:02d}_template.xlsx'.format( screen_plate.name, bait_batch_id, prey_batch_id)) export_screen_plate(template_file, screen_plate, metadata) logging.info("Saved screen template to {}".format(template_file)) # Save to database db.commit()
def import_batch_sheet(filename, sheet, mapping, expected_ids): ''' Import a batch sheet validating the data and checking that the included ids are part of the reference protein list ''' logging.info("Importing batch sheet '%s' from %s" % (sheet, filename)) batch_id = filter_digits(sheet) batch_name = sheet[len('batch'):].strip() # Import batch proteins df_batch = read_excel_list(filename, sheet, mapping) df_batch['id'] = df_batch['id'].str.upper() df_batch['symbol'].fillna(df_batch.id, inplace=True) # TODO: remove those not cloned? logging.debug("Batch contains %d successfully cloned proteins" % len(df_batch)) # Validate data required = ['subfamily', 'id', 'symbol', 'nickname'] df_incomplete = df_batch[df_batch[required].isnull().any(axis=1)] assert_empty_df( df_incomplete, "Some of the proteins in sheet '{}' have incomplete information". format(sheet)) df_invalid_id = df_batch[~df_batch.id.str.contains( "^{}$".format(PROTEIN_ID_REGEX), regex=True, na=False)] assert_empty_df( df_invalid_id, "Some of the provided ids in sheet '{}' are invalid".format(sheet), ['#', 'id']) df_duplicate_id = df_batch[df_batch.duplicated(subset='id', keep=False)] assert_empty_df( df_duplicate_id, "Some of the provided ids in sheet '{}' are duplicated".format(sheet), ['#', 'id']) df_unexpected_id = df_batch[~df_batch.id.isin(expected_ids)] assert_empty_df( df_unexpected_id, "Some of the provided ids in sheet '{}' are not in the reference protein list" .format(sheet), ['#', 'id']) # Update the protein info with id, nickname, subfamily and symbol proteins = df_batch.to_dict(orient='records') for protein in proteins: db.query(Protein).\ filter(Protein.id == protein['id']).\ update({k: protein[k] for k in ('subfamily', 'symbol', 'nickname')}) db.commit() # Import the batch info # TODO: order number? reindex? df_batch_proteins = df_batch.copy()[['id', 'nickname', 'cloned']] df_batch_proteins.rename(columns={'id': 'protein_id'}, inplace=True) df_batch_proteins['batch_name'] = batch_name df_batch_proteins['batch_id'] = batch_id df_batch_proteins['order'] = df_batch_proteins.index batch_proteins = df_batch_proteins.to_dict(orient='records') db.execute(BatchProtein.__table__.insert(), batch_proteins)