Beispiel #1
0
def save_template(plate_id, template):
    for (cell, row, col) in iterate96WP():
        if not template[cell]['prey']:
            continue

        bait = prey = None
        is_NC = is_PC = False

        if template[cell]['prey'] == POS_CONTROL:
            is_PC = True
        elif template[cell]['prey'] == NEG_CONTROL:
            is_NC = True
        else:
            bait = template[cell]['bait']
            prey = template[cell]['prey']

        plate_cell = PlateCell(row=row,
                               column=col,
                               bait=bait,
                               prey=prey,
                               is_NC=is_NC,
                               is_PC=is_PC,
                               plate_id=plate_id)
        db.add(plate_cell)

    db.commit()
Beispiel #2
0
def init_db(proteins_list):
    db_reset()

    # Import proteins list
    mapping = {
        'A': 'family',
        'C': 'id',
        'I': 'symbol',
        'J': 'long_symbol',
        'K': 'description'
    }
    df_proteins = read_excel_list(proteins_list, 'Proteins List', mapping)
    df_proteins['id'] = df_proteins['id'].str.upper()
    df_proteins['symbol'].fillna(df_proteins.id, inplace=True)

    # Validate data
    df_invalid_id = df_proteins[~df_proteins.id.str.contains(
        "^{}$".format(PROTEIN_ID_REGEX), regex=True, na=False)]
    assert_empty_df(
        df_invalid_id,
        "Some of the provided ids in the protein list are invalid")

    df_duplicate_id = df_proteins[df_proteins.duplicated(subset='id',
                                                         keep=False)]
    assert_empty_df(
        df_duplicate_id,
        "Some of the provided ids in the protein list are duplicated")

    df_no_family = df_proteins[df_proteins.family.isnull()]
    assert_empty_df(
        df_no_family,
        "Some of the proteins in the reference list don't have an assigned family"
    )

    # Insert proteins into DB
    proteins = df_proteins.to_dict(orient='records')
    db.execute(Protein.__table__.insert(), proteins)

    # Import batch sheets
    mapping = {
        'A': '#',
        'B': 'subfamily',
        'C': 'id',
        'D': 'symbol',
        'E': 'nickname',
        'J': 'cloned'
    }

    for sheet in get_batch_sheets(proteins_list.name):
        import_batch_sheet(proteins_list.name, sheet, mapping, df_proteins.id)

    db.commit()
Beispiel #3
0
def plates_first_batch(first_batch_list, outfolder):
    batch_id = 1

    metadata = OrderedDict()
    metadata['Notes'] = 'Generated from the original batch 1 template'
    metadata['Batch 1 file'] = ''

    plates, base_template = parse_1st_batch_list(first_batch_list)
    # Index ids for batch, by sybmol
    proteins = get_batch_proteins(batch_id)
    prots_by_symbol = {protein.symbol:protein for protein in proteins}

    replace_prey_symbols(base_template, prots_by_symbol)

    # Remove plates from db
    db.query(Plate).filter(Plate.bait_batch_id==batch_id,
                           Plate.prey_batch_id==batch_id
                           ).delete()

    for id, plate in enumerate(plates):
        bait_1 = prots_by_symbol.get(plate['Bait_1'], None)
        bait_2 = prots_by_symbol.get(plate['Bait_2'], None)

        template = add_baits(base_template, bait_1, bait_2)

        # Plate 4 of the first batch was put turned 180 in the reader
        if plate['filename'] == 'batch 4 SERK1 BAK1':
            template = reverse(template)

        plate_id = id + 1
        plate_name = screen_plate_name(plate_id)
        metadata['Batch 1 file'] = plate['filename']
        metadata['Timeshift'] = plate['timepoint']

        # Save plate to database
        plate = Plate(id=plate_id, bait_batch_id=batch_id, prey_batch_id=batch_id)
        db.add(plate)
        db.commit()
        save_template(plate_id, template)

        # TODO: export plate from database?
        export_1st_batch_plate(plate_name, template, metadata, outfolder)
Beispiel #4
0
def create_screen_plates(bait_batch_id, prey_batch_id, outfolder):
    metadata = OrderedDict()
    metadata['Plate name'] = ''
    metadata['Plate type'] = 'Screen'
    metadata['Timeshift'] = ''
    metadata['Bait plate'] = ''
    metadata['Prey plate'] = storage_prey_plate_name(prey_batch_id)

    # Same prey plate for all screen plates
    prey_prots = get_batch_proteins(prey_batch_id)
    prey_plate = PreyStoragePlate(prey_prots)
    prey_plate.name = storage_prey_plate_name(prey_batch_id)

    # 2 bait proteins per plate

    bait_prots = get_batch_proteins(bait_batch_id)
    for i in range(0, len(bait_prots), ScreenPlate.capacity()):
        plate = Plate(bait_batch_id=bait_batch_id, prey_batch_id=prey_batch_id)
        db.add(plate)
        db.flush()
        baits = bait_prots[i:i + ScreenPlate.capacity()]

        bait_plate_name = storage_bait_plate_name(bait_batch_id, BaitStoragePlate.bait_plate_index(i))
        screen_plate = ScreenPlate(baits, bait_plate_name, BaitStoragePlate.bait_plate_offset(i), prey_plate)
        screen_plate.name = screen_plate_name(plate.id)

        metadata['Plate name'] = screen_plate.name
        metadata['Bait plate'] = bait_plate_name
        metadata['Prey plate'] = screen_plate.prey_plate_name
        template_file = os.path.join(outfolder, '{}_b{:02d}_p{:02d}_template.xlsx'.format(
            screen_plate.name, bait_batch_id, prey_batch_id))
        export_screen_plate(template_file, screen_plate, metadata)
        logging.info("Saved screen template to {}".format(template_file))

        # Save to database
        db.commit()
Beispiel #5
0
def import_batch_sheet(filename, sheet, mapping, expected_ids):
    '''
    Import a batch sheet validating the data and checking that the
    included ids are part of the reference protein list
    '''
    logging.info("Importing batch sheet '%s' from %s" % (sheet, filename))
    batch_id = filter_digits(sheet)
    batch_name = sheet[len('batch'):].strip()

    # Import batch proteins
    df_batch = read_excel_list(filename, sheet, mapping)
    df_batch['id'] = df_batch['id'].str.upper()
    df_batch['symbol'].fillna(df_batch.id, inplace=True)
    # TODO: remove those not cloned?
    logging.debug("Batch contains %d successfully cloned proteins" %
                  len(df_batch))

    # Validate data
    required = ['subfamily', 'id', 'symbol', 'nickname']
    df_incomplete = df_batch[df_batch[required].isnull().any(axis=1)]
    assert_empty_df(
        df_incomplete,
        "Some of the proteins in sheet '{}' have incomplete information".
        format(sheet))

    df_invalid_id = df_batch[~df_batch.id.str.contains(
        "^{}$".format(PROTEIN_ID_REGEX), regex=True, na=False)]
    assert_empty_df(
        df_invalid_id,
        "Some of the provided ids in sheet '{}' are invalid".format(sheet),
        ['#', 'id'])

    df_duplicate_id = df_batch[df_batch.duplicated(subset='id', keep=False)]
    assert_empty_df(
        df_duplicate_id,
        "Some of the provided ids in sheet '{}' are duplicated".format(sheet),
        ['#', 'id'])

    df_unexpected_id = df_batch[~df_batch.id.isin(expected_ids)]
    assert_empty_df(
        df_unexpected_id,
        "Some of the provided ids in sheet '{}' are not in the reference protein list"
        .format(sheet), ['#', 'id'])

    # Update the protein info with id, nickname, subfamily and symbol
    proteins = df_batch.to_dict(orient='records')
    for protein in proteins:
        db.query(Protein).\
        filter(Protein.id == protein['id']).\
        update({k: protein[k] for k in ('subfamily', 'symbol', 'nickname')})
    db.commit()

    # Import the batch info
    # TODO: order number? reindex?
    df_batch_proteins = df_batch.copy()[['id', 'nickname', 'cloned']]
    df_batch_proteins.rename(columns={'id': 'protein_id'}, inplace=True)
    df_batch_proteins['batch_name'] = batch_name
    df_batch_proteins['batch_id'] = batch_id
    df_batch_proteins['order'] = df_batch_proteins.index
    batch_proteins = df_batch_proteins.to_dict(orient='records')
    db.execute(BatchProtein.__table__.insert(), batch_proteins)