Esempio n. 1
0
def update_ManualScoreCode_table(command, cursor):
    """
    Update ManualScoreCode according to the legacy table of the same name.

    We've made these decisions about migrating score codes and scores
        into the new database:

    - Scrap these antiquated codes, along with scores:
        -8: 2 degree pool (currently no scores; not used)
        -1: not sure (just Julie, whose scores are being deleted anyway)
        4: No Larvae (for a preliminary DevStaR test; no easy conversion)
        5: Larvae present ("")
        6: A lot of Larvae ("")

    - Scrap these antiquated codes, but convert scores:
        -6: Poor Image Quality (convert to -7)

    - Migrate these antiquated codes, but do not show in interface:
        -5: IA Error
    """
    recorded_score_codes = ManualScoreCode.objects.all()
    fields_to_compare = ('legacy_description', )

    legacy_query = ('SELECT code, definition FROM ManualScoreCode '
                    'WHERE code != -8 AND code != -1 AND code != 4 '
                    'AND code != 5 AND code != 6 AND code != -6')

    def sync_score_code_row(legacy_row):
        new_score_code = ManualScoreCode(
            id=legacy_row[0], legacy_description=legacy_row[1].decode('utf8'))

        return update_or_save_object(command, new_score_code,
                                     recorded_score_codes, fields_to_compare)

    sync_rows(command, cursor, legacy_query, sync_score_code_row)
Esempio n. 2
0
def update_ManualScore_table_secondary(command, cursor):
    recorded_scores = ManualScore.objects.all()
    fields_to_compare = None
    legacy_query = ('SELECT expID, ImgName, score, '
                    'scoreBy, scoreYMD, ScoreTime '
                    'FROM ScoreResultsManual '
                    'ORDER BY expID, ImgName')

    def sync_score_row(legacy_row):
        legacy_score_code = legacy_row[2]
        legacy_scorer = legacy_row[3]
        experiment = get_experiment(legacy_row[0], tile_to_well(legacy_row[1]))
        score_code = get_score_code(legacy_score_code)
        scorer = get_user(legacy_scorer)

        timestamp = get_timestamp_from_ymd(legacy_row[4], legacy_row[5])
        if not timestamp:
            raise CommandError(
                'ERROR: score of experiment {}, well {} '
                'could not be converted to a proper datetime'.format(
                    legacy_row[0], legacy_row[1]))

        new_score = ManualScore(experiment=experiment,
                                score_code=score_code,
                                scorer=scorer,
                                timestamp=timestamp)

        return update_or_save_object(command,
                                     new_score,
                                     recorded_scores,
                                     fields_to_compare,
                                     alternate_pk={
                                         'experiment': new_score.experiment,
                                         'score_code': new_score.score_code,
                                         'scorer': new_score.scorer,
                                         'timestamp': new_score.timestamp
                                     })

    sync_rows(command, cursor, legacy_query, sync_score_row)
Esempio n. 3
0
def update_Clone_table(command, cursor):
    """
    Update Clone with distinct clones from legacy table `RNAiPlate`.

    Find the distinct Ahringer clone names (in 'sjj_X' format) and
    the L4440 empty vector clone (just called 'L4440')
    from the clone field of RNAiPlate.

    Find the distinct Vidal clone names (in 'GHR-X@X' format)
    from the 384PlateID and 384Well fields of RNAiPlate
    (note that we are no longer using the 'mv_X'-style Vidal clone names,
    and our PK for Vidal clones will now be in 'GHR-X@X' style).
    """
    recorded_clones = Clone.objects.all()
    fields_to_compare = None

    legacy_query = ('SELECT DISTINCT clone FROM RNAiPlate '
                    'WHERE clone LIKE "sjj%" OR clone = "L4440"')

    def sync_clone_row(legacy_row):
        new_clone = Clone(id=legacy_row[0])
        return update_or_save_object(command, new_clone, recorded_clones,
                                     fields_to_compare)

    legacy_query_vidal = ('SELECT DISTINCT 384PlateID, 384Well FROM RNAiPlate '
                          'WHERE clone LIKE "mv%"')

    def sync_clone_row_vidal(legacy_row):
        vidal_clone_name = generate_vidal_clone_name(legacy_row[0],
                                                     legacy_row[1])
        new_clone = Clone(id=vidal_clone_name)
        return update_or_save_object(command, new_clone, recorded_clones,
                                     fields_to_compare)

    sync_rows(command, cursor, legacy_query, sync_clone_row)
    sync_rows(command, cursor, legacy_query_vidal, sync_clone_row_vidal)
Esempio n. 4
0
def update_ManualScore_table_secondary(command, cursor):
    recorded_scores = ManualScore.objects.all()
    fields_to_compare = None
    legacy_query = ('SELECT expID, ImgName, score, '
                    'scoreBy, scoreYMD, ScoreTime '
                    'FROM ScoreResultsManual '
                    'ORDER BY expID, ImgName')

    def sync_score_row(legacy_row):
        legacy_score_code = legacy_row[2]
        legacy_scorer = legacy_row[3]
        experiment = get_experiment(legacy_row[0], tile_to_well(legacy_row[1]))
        score_code = get_score_code(legacy_score_code)
        scorer = get_user(legacy_scorer)

        timestamp = get_timestamp_from_ymd(legacy_row[4], legacy_row[5])
        if not timestamp:
            raise CommandError(
                'ERROR: score of experiment {}, well {} '
                'could not be converted to a proper datetime'
                .format(legacy_row[0], legacy_row[1]))

        new_score = ManualScore(
            experiment=experiment,
            score_code=score_code,
            scorer=scorer,
            timestamp=timestamp)

        return update_or_save_object(
            command, new_score, recorded_scores, fields_to_compare,
            alternate_pk={'experiment': new_score.experiment,
                          'score_code': new_score.score_code,
                          'scorer': new_score.scorer,
                          'timestamp': new_score.timestamp})

    sync_rows(command, cursor, legacy_query, sync_score_row)
Esempio n. 5
0
def update_ManualScoreCode_table(command, cursor):
    """
    Update ManualScoreCode according to the legacy table of the same name.

    We've made these decisions about migrating score codes and scores
        into the new database:

    - Scrap these antiquated codes, along with scores:
        -8: 2 degree pool (currently no scores; not used)
        -1: not sure (just Julie, whose scores are being deleted anyway)
        4: No Larvae (for a preliminary DevStaR test; no easy conversion)
        5: Larvae present ("")
        6: A lot of Larvae ("")

    - Scrap these antiquated codes, but convert scores:
        -6: Poor Image Quality (convert to -7)

    - Migrate these antiquated codes, but do not show in interface:
        -5: IA Error
    """
    recorded_score_codes = ManualScoreCode.objects.all()
    fields_to_compare = ('legacy_description',)

    legacy_query = ('SELECT code, definition FROM ManualScoreCode '
                    'WHERE code != -8 AND code != -1 AND code != 4 '
                    'AND code != 5 AND code != 6 AND code != -6')

    def sync_score_code_row(legacy_row):
        new_score_code = ManualScoreCode(
            id=legacy_row[0],
            legacy_description=legacy_row[1].decode('utf8'))

        return update_or_save_object(
            command, new_score_code, recorded_score_codes, fields_to_compare)

    sync_rows(command, cursor, legacy_query, sync_score_code_row)
Esempio n. 6
0
def update_LibraryPlate_table(command, cursor):
    """
    Update LibraryPlate according to legacy tables.

    Uses legacy tables `RNAiPlate` and `CherryPickRNAiPlate`.

    Find original Ahringer 384-well plates through the chromosome and
    384PlateID fields of RNAiPlate (384PlateID NOT LIKE GHR-%, != 0).

    Find original Orfeome plates through the 384PlateID field of RNAiPlate
    (384PlateID LIKE GHR-%). Note that these original Orfeome plates
    are actually in 96-well format.

    Find the plates actually used in our primary experiments as the
    distinct RNAiPlateID from RNAiPlate, and the distinct RNAiPlateID
    LIKE Eliana% from ReArrayRNAiPlate.

    Find the plates actually used in our secondary experiments as the
    distinct RNAiPlateID from CherryPickRNAiPlates. Must also skip L4440
    (because in order to get around some bizarities in the old database
    separating the primary and secondary screens into two tables, we have
    the same L4440 plate listed in both tables in the legacy database).
    """
    recorded_plates = LibraryPlate.objects.all()
    fields_to_compare = ('screen_stage', 'number_of_wells')

    legacy_query_384_plates = ('SELECT DISTINCT chromosome, 384PlateID '
                               'FROM RNAiPlate '
                               'WHERE 384PlateID NOT LIKE "GHR-%" '
                               'AND 384PlateID != 0')

    legacy_query_orfeome_plates = ('SELECT DISTINCT 384PlateID '
                                   'FROM RNAiPlate '
                                   'WHERE 384PlateID LIKE "GHR-%"')

    legacy_query_l4440_plate = ('SELECT DISTINCT RNAiPlateID '
                                'FROM RNAiPlate '
                                'WHERE RNAiPlateID = "L4440"')

    legacy_query_primary_plates = ('SELECT DISTINCT RNAiPlateID '
                                   'FROM RNAiPlate '
                                   'WHERE RNAiPlateID != "L4440"')

    legacy_query_eliana_rearrays = ('SELECT DISTINCT RNAiPlateID FROM '
                                    'ReArrayRNAiPlate WHERE RNAiPlateID '
                                    'LIKE "Eliana%"')

    legacy_query_secondary_plates = ('SELECT DISTINCT RNAiPlateID '
                                     'FROM CherryPickRNAiPlate '
                                     'WHERE RNAiPlateID != "L4440"')

    def sync_library_plate_row(legacy_row, screen_stage=None,
                               number_of_wells=96):
        if len(legacy_row) > 1:
            plate_name = generate_ahringer_384_plate_name(legacy_row[0],
                                                          legacy_row[1])
        else:
            plate_name = generate_library_plate_name(legacy_row[0])

        new_plate = LibraryPlate(id=plate_name, screen_stage=screen_stage,
                                 number_of_wells=number_of_wells)

        return update_or_save_object(command, new_plate, recorded_plates,
                                     fields_to_compare)

    # Sync the 384-well Ahringer plates from which our 96-well Ahringer plates
    # were arrayed
    sync_rows(command, cursor, legacy_query_384_plates,
              sync_library_plate_row, screen_stage=0,
              number_of_wells=384)

    # Sync the 96-well Orfeome plates from which our 96-well Vidal rearrays
    # were cherry-picked
    sync_rows(command, cursor, legacy_query_orfeome_plates,
              sync_library_plate_row, screen_stage=0)

    # Sync the L4440 plate used in both Primary and Seconday experiments
    sync_rows(command, cursor, legacy_query_l4440_plate,
              sync_library_plate_row)

    # Sync the 96-well plates used in our Primary experiments (includes
    # Ahringer plates and Vidal plates)
    sync_rows(command, cursor, legacy_query_primary_plates,
              sync_library_plate_row, screen_stage=1)

    # Sync the 96-well "Eliana Rearray" plates, which tried to salvage wells
    # that did not grow consistently in the other primary screen plates
    sync_rows(command, cursor, legacy_query_eliana_rearrays,
              sync_library_plate_row, screen_stage=1)

    # Sync the 96-well plates used for our Secondary experiments
    sync_rows(command, cursor, legacy_query_secondary_plates,
              sync_library_plate_row, screen_stage=2)
Esempio n. 7
0
def update_LibraryStock_table(command, cursor):
    """
    Update LibraryStock according to legacy tables.

    This information comes from a variety of queries of
    primarily according to legacy tables RNAiPlate and CherryPickRNAiPlate.
    Detailed comments inline.
    """
    recorded_wells = LibraryStock.objects.all()
    fields_to_compare = ('plate', 'well', 'parent_stock',
                         'intended_clone')

    # 'Source' plates are Ahringer 384 plates and original Orfeome
    # plates (e.g. GHR-10001). Plate names are captured as they
    # were in update_LibraryPlate_table (i.e., using fields 384PlateID and
    # chromosome). Well is captured in field 384Well. Clone is captured
    # as described in update_Clone_table (i.e., using clone field for
    # sjj clones, and source plate@well for mv clones).
    legacy_query_source = ('SELECT DISTINCT 384PlateID, 384Well, '
                           'chromosome, clone FROM RNAiPlate '
                           'WHERE clone LIKE "sjj%" OR clone LIKE "mv%"')

    def sync_source_row(legacy_row):
        plate_name = legacy_row[0]
        clone_name = legacy_row[3]

        if re.match('sjj', clone_name):
            chromosome = legacy_row[2]
            plate_name = generate_ahringer_384_plate_name(
                chromosome, plate_name)

        well_improper = legacy_row[1]
        well_proper = get_three_character_well(well_improper)

        if re.match('mv', clone_name):
            clone_name = generate_vidal_clone_name(plate_name,
                                                   well_improper)

        new_well = LibraryStock(
            id=generate_library_stock_name(plate_name, well_proper),
            plate=get_library_plate(plate_name), well=well_proper,
            parent_stock=None, intended_clone=get_clone(clone_name))

        return update_or_save_object(command, new_well, recorded_wells,
                                     fields_to_compare)

    # Primary well layout captured in RNAiPlate table (fields RNAiPlateID
    # and 96well). Clone is determined the same way as described in
    # update_Clone_table (i.e., clone column for sjj clones, and source
    # plate@well for mv clones). No parent for L4440 wells. Parent for other
    # plates determined using fields 384PlateID, chromosome, and 384Well.
    legacy_query_primary = ('SELECT RNAiPlateID, 96well, clone, '
                            'chromosome, 384PlateID, 384Well '
                            'FROM RNAiPlate')

    def sync_primary_row(legacy_row):
        plate_name = legacy_row[0]
        well = get_three_character_well(legacy_row[1])
        clone_name = legacy_row[2]
        parent_plate_name = legacy_row[4]

        if re.match('sjj', clone_name):
            parent_chromosome = legacy_row[3]
            parent_plate_name = generate_ahringer_384_plate_name(
                parent_chromosome, parent_plate_name)
        parent_well_improper = legacy_row[5]
        if re.match('mv', clone_name):
            clone_name = generate_vidal_clone_name(parent_plate_name,
                                                   parent_well_improper)

        intended_clone = get_clone(clone_name)

        if re.match('L4440', clone_name):
            parent_stock = None

        else:
            parent_well_proper = get_three_character_well(parent_well_improper)
            parent_stock = get_library_stock(parent_plate_name,
                                             parent_well_proper)

            # Confirm that this intended clone matches parent's clone
            if parent_stock.intended_clone != intended_clone:
                raise CommandError('Clone {} does not match parent\n'
                                   .format(clone_name))

        new_well = LibraryStock(
            id=generate_library_stock_name(plate_name, well),
            plate=get_library_plate(plate_name), well=well,
            parent_stock=parent_stock,
            intended_clone=intended_clone)

        return update_or_save_object(command, new_well, recorded_wells,
                                     fields_to_compare)

    # L4440 wells from secondary screen are treated specially (since
    # the complicated join used to resolve parents below complicates things
    # for L4440). L4440 wells have no recorded parent.
    legacy_query_secondary_L4440 = ('SELECT RNAiPlateID, 96well '
                                    'FROM CherryPickRNAiPlate '
                                    'WHERE clone = "L4440"')

    def sync_secondary_L4440_row(legacy_row):
        plate_name = legacy_row[0]
        well = get_three_character_well(legacy_row[1])

        new_well = LibraryStock(
            id=generate_library_stock_name(plate_name, well),
            plate=get_library_plate(plate_name), well=well,
            parent_stock=None, intended_clone=get_clone('L4440'))

        return update_or_save_object(command, new_well, recorded_wells,
                                     fields_to_compare)

    # Secondary well layout is captured in CherryPickRNAiPlate table (fields
    # RNAiPlate and 96well). However, there are no columns in this table
    # for parent. CherryPickTemplate captures some parent relationships,
    # but not all. Therefore, we rely on CherryPickTemplate where available
    # to define parent relationship. Otherwise, we guess based on clone name
    # (which almost always uniquely defines the source well). In cases not
    # in CherryPickTemplate and where there is ambiguity, leave the parent
    # undefined (we need go back through physical notes to resolve these).
    legacy_query_secondary = (
        'SELECT C.RNAiPlateID as plate, C.96well as well, C.clone, '
        'T.RNAiPlateID as definite_parent_plate, '
        'T.96well as definite_parent_well, '
        'R.RNAiPlateID as likely_parent_plate, '
        'R.96well as likely_parent_well, '
        'R.clone as likely_parent_clone '
        'FROM CherryPickRNAiPlate AS C '
        'LEFT JOIN CherryPickTemplate AS T '
        'ON finalRNAiPlateID = C.RNAiPlateID AND final96well = C.96well '
        'LEFT JOIN RNAiPlate AS R ON C.clone=R.clone AND '
        '(T.RNAiPlateID IS NULL OR '
        '(T.RNAiPlateID=R.RNAiPlateID AND T.96well=R.96well)) '
        'WHERE C.clone != "L4440" '
        'ORDER BY C.RNAiPlateID, C.96well')

    def sync_secondary_row(legacy_row):
        plate_name = legacy_row[0]
        well = get_three_character_well(legacy_row[1])
        clone_name = legacy_row[2]

        definite_parent_plate_name = legacy_row[3]
        definite_parent_well = legacy_row[4]
        if definite_parent_well:
            definite_parent_well = get_three_character_well(
                definite_parent_well)

        likely_parent_plate_name = legacy_row[5]
        likely_parent_well = legacy_row[6]
        if likely_parent_well:
            likely_parent_well = get_three_character_well(likely_parent_well)

        likely_parent_clone_name = legacy_row[7]

        if (definite_parent_plate_name and likely_parent_plate_name and
                definite_parent_plate_name != likely_parent_plate_name):
            raise CommandError(
                'ERROR: definite and likely parent plates disagree '
                'for {} {}\n'.format(plate_name, well))

        if (definite_parent_well and likely_parent_well and
                definite_parent_well != likely_parent_well):
            raise CommandError(
                'ERROR: definite and likely parent wells disagree '
                'for {} {}\n'.format(plate_name, well))

        try:
            if definite_parent_plate_name and definite_parent_well:
                parent_stock = get_library_stock(definite_parent_plate_name,
                                                 definite_parent_well)

            else:
                parent_stock = get_library_stock(likely_parent_plate_name,
                                                 likely_parent_well)

            intended_clone = parent_stock.intended_clone

        except ObjectDoesNotExist:
            command.stderr.write(
                'WARNING for LibraryStock {} {}: parent not '
                'found in LibraryStock\n'.format(plate_name, well))

            parent_stock = None
            intended_clone = None

        if clone_name and (clone_name != likely_parent_clone_name):
            command.stderr.write(
                'WARNING for LibraryStock {} {}: clone recorded '
                'in CherryPickRNAiPlate is inconsistent with '
                'CherryPickTemplate source/destination records\n'
                .format(plate_name, well))

        if re.match('sjj', clone_name):
            try:
                recorded_clone = get_clone(clone_name)
                if recorded_clone != intended_clone:
                    command.stderr.write(
                        'WARNING for LibraryStock {} {}: clone recorded '
                        'in CherryPickRNAiPlate does not match its '
                        'parent\'s clone\n'.format(plate_name, well))

            except ObjectDoesNotExist:
                command.stderr.write(
                    'WARNING for LibraryStock {} {}: clone recorded in '
                    'CherryPickRNAiPlate not found at all in RNAiPlate\n'
                    .format(plate_name, well))

        new_well = LibraryStock(
            id=generate_library_stock_name(plate_name, well),
            plate=get_library_plate(plate_name), well=well,
            parent_stock=parent_stock,
            intended_clone=intended_clone)

        return update_or_save_object(command, new_well, recorded_wells,
                                     fields_to_compare)

    legacy_query_eliana = (
        'SELECT RNAiPlateID, 96well, OldPlateID, OldWellPosition '
        'FROM ReArrayRNAiPlate WHERE RNAiPlateID LIKE "Eliana%"')

    def sync_eliana_row(legacy_row):
        plate_name = legacy_row[0]
        well = get_three_character_well(legacy_row[1])

        if legacy_row[2]:
            try:
                parent_stock = get_library_stock(legacy_row[2], legacy_row[3])
                intended_clone = parent_stock.intended_clone
            except ObjectDoesNotExist:
                parent_stock = None
                intended_clone = None

        else:
            parent_stock = None
            intended_clone = None

        new_well = LibraryStock(
            id=generate_library_stock_name(plate_name, well),
            plate=get_library_plate(plate_name), well=well,
            parent_stock=parent_stock,
            intended_clone=intended_clone)

        return update_or_save_object(command, new_well, recorded_wells,
                                     fields_to_compare)

    sync_rows(command, cursor, legacy_query_source, sync_source_row)
    sync_rows(command, cursor, legacy_query_primary, sync_primary_row)
    sync_rows(command, cursor, legacy_query_eliana, sync_eliana_row)
    sync_rows(command, cursor, legacy_query_secondary_L4440,
              sync_secondary_L4440_row)
    sync_rows(command, cursor, legacy_query_secondary, sync_secondary_row)
Esempio n. 8
0
def update_ManualScore_table_primary(command, cursor):
    """
    Update ManualScore according to the legacy table of the same name.

    Requires that ScoreYear, ScoreMonth, ScoreDate, and ScoreTime
    are valid fields for a python datetime.datetime.

    As described in update_ManualScoreCode_table, do not migrate scores
    with score code -8, -1, 4, 5, or 6.

    Also as described in update_ManualScoreCode_table, convert
    any -6 scores to -7 during migration.

    In addition, we've made these decisions about score migration:
        - *?*?* do not migrate isJunk = -1 scores, as we are probably trashing
          these experiments entirely *?*?*

        - scrap scorer Julie, treating her scores as follows:
            no bacteria (-2) scores: mark as scored by hueyling (hueyling did
                    populate these scores automatically, and Julie was simply
                    listed as the default scorer in the old database
            all other scores: do not migrate (all correspond to misbehaving
                    spn-4 line and have no bearing on our results)

        - scrap scorer expPeople, treating scores as hueyling

        - scrap alejandro and katy as scorers, not migrating any of their
          scores (all their scores were redundant; alejandro was not trained
          well, and katy's were done prior to other decisions)

        - scrap ENH scores only by eliana and lara

        - keep ENH scores by sherly, kelly, and patricia, but do not have
          these show in the interface

        - for sherly and patricia's ENH scores, ensure that any medium or
          strong enhancers were caught by official scorers
    """
    recorded_scores = ManualScore.objects.all()
    fields_to_compare = None

    legacy_query = ('SELECT ManualScore.expID, ImgName, score, scoreBy, '
                    'scoreYMD, ScoreYear, ScoreMonth, ScoreDate, '
                    'ScoreTime, mutant, screenFor '
                    'FROM ManualScore '
                    'LEFT JOIN RawData '
                    'ON ManualScore.expID = RawData.expID '
                    'WHERE score != -8 AND score != -1 AND score != 4 '
                    'AND score != 5 AND score != 6 '
                    'ORDER BY ManualScore.expID, ImgName')

    def sync_score_row(legacy_row):
        legacy_score_code = legacy_row[2]
        legacy_scorer = legacy_row[3]
        gene = legacy_row[9]
        screen = legacy_row[10]

        # Skip some scores entirely (see criteria above)
        skip_entirely = ('alejandro', 'katy')
        skip_ENH = ('eliana', 'lara')

        if ((legacy_scorer == 'Julie' and legacy_score_code != -2) or
                (legacy_scorer in skip_entirely) or
                (screen == 'ENH' and legacy_scorer in skip_ENH)):
            command.stderr.write(
                'FYI: Skipping a {} {} score of {} by {}\n'
                .format(gene, screen, legacy_score_code, legacy_scorer))

            return True

        # Convert some scorers to hueyling (see criteria above)
        if legacy_scorer == 'Julie' or legacy_scorer == 'expPeople':
            command.stderr.write(
                'FYI: Converting a {} score by {} to hueyling\n'
                .format(legacy_score_code, legacy_scorer))

            legacy_scorer = 'hueyling'

        # Convert some score codes (see criteria above)
        if legacy_score_code == -6:
            command.stderr.write('FYI: Converting score from -6 to -7\n')
            legacy_score_code = -7

        # Following raise exceptions if improperly formatted or not found
        experiment = get_experiment(legacy_row[0], tile_to_well(legacy_row[1]))
        score_code = get_score_code(legacy_score_code)
        scorer = get_user(legacy_scorer)

        timestamp = get_timestamp(legacy_row[5], legacy_row[6], legacy_row[7],
                                  legacy_row[8], legacy_row[4])
        if not timestamp:
            raise CommandError(
                'ERROR: score of {} could  not be converted to a proper '
                'datetime'.format(experiment))

        new_score = ManualScore(
            experiment=experiment,
            score_code=score_code,
            scorer=scorer,
            timestamp=timestamp)

        return update_or_save_object(
            command, new_score, recorded_scores, fields_to_compare,
            alternate_pk={'experiment': new_score.experiment,
                          'score_code': new_score.score_code,
                          'scorer': new_score.scorer,
                          'timestamp': new_score.timestamp})

    sync_rows(command, cursor, legacy_query, sync_score_row)
Esempio n. 9
0
def update_Experiment_tables(command, cursor):
    """
    Update ExperimentPlate and Experiment according to legacy table `RawData`.

    Several datatype transforms occur from the old to the new schema:

        - recordDate string becomes a DATE
        - temperature string (suffixed 'C') becomes a decimal
        - plate-level mutant/mutantAllele becomes well-level FK to WormStrain
        - plate-level RNAiPlateID becomes becomes well-level pointers to
          LibraryStocks
        - plate-level isJunk with three values (-1, 0, 1) becomes a boolean
          (with both 1 and -1 becoming 1), and and becomes well-level

    Also, experiments of Julie's (which were done with a line of spn-4 worms
    later deemed untrustworthy) are excluded.
    """
    recorded_plates = ExperimentPlate.objects.all()
    recorded_wells = Experiment.objects.all()

    plate_fields_to_compare = ('screen_stage', 'temperature', 'date',
                               'comment')

    well_fields_to_compare = ('plate', 'well', 'worm_strain',
                              'library_stock', 'is_junk')

    legacy_query = ('SELECT expID, mutant, mutantAllele, RNAiPlateID, '
                    'CAST(SUBSTRING_INDEX(temperature, "C", 1) '
                    'AS DECIMAL(3,1)), '
                    'CAST(recordDate AS DATE), ABS(isJunk), comment '
                    'FROM RawData '
                    'WHERE (expID < 40000 OR expID >= 50000) '
                    'AND RNAiPlateID NOT LIKE "Julie%" '
                    'ORDER BY expID')

    def sync_experiment_row(legacy_row):
        experiment_plate_id = legacy_row[0]
        worm_strain = get_worm_strain(legacy_row[1], legacy_row[2])
        legacy_library_plate_name = legacy_row[3]
        temperature = legacy_row[4]
        date = legacy_row[5]
        is_junk = legacy_row[6]
        comment = legacy_row[7]

        all_match = True

        if experiment_plate_id < 40000:
            screen_stage = 1
        else:
            screen_stage = 2

        new_plate = ExperimentPlate(
            id=experiment_plate_id,
            screen_stage=screen_stage,
            temperature=temperature,
            date=date,
            comment=comment)

        all_match &= update_or_save_object(
            command, new_plate, recorded_plates, plate_fields_to_compare)

        experiment_plate = get_experiment_plate(experiment_plate_id)

        for well in get_well_list():
            new_well = Experiment(
                id=generate_experiment_id(experiment_plate_id, well),
                plate=experiment_plate, well=well,
                worm_strain=worm_strain,
                library_stock=get_library_stock(
                    legacy_library_plate_name, well),
                is_junk=is_junk)

            all_match &= update_or_save_object(
                command, new_well, recorded_wells,
                well_fields_to_compare)

        return all_match

    sync_rows(command, cursor, legacy_query, sync_experiment_row)
Esempio n. 10
0
def update_DevstarScore_table(command, cursor):
    """
    Update DevstarScore according to legacy table `RawDataWithScore`.

    Redundant fields (mutantAllele, targetRNAiClone, RNAiPlateID) are
    excluded.

    The DevStaR output is simply 6 fields (adult/larva/embryo area,
    adult/larva count, and whether bacteria is present).
    Several other fields are simply calculations on these fields
    (embryo per adult, larva per adult, survival, lethality).
    I redo these calculations, but compare to the legacy value to make sure
    it is equal or almost equal. We also have some datatype conversions:
        - embryo/larva per adult become Float
        - survival and lethality become higher precision Floats
        - counts of -1 become NULL (this happens when part of the DevStaR
          program did not run)
        - division by 0 in embryo/larva per adult remain 0, but when
          adult=0 we DO now calculate survival and lethality
        - machineCall becomes a Boolean
    """
    recorded_scores = DevstarScore.objects.all()

    fields_to_compare = ('area_adult', 'area_larva', 'area_embryo',
                         'count_adult', 'count_larva', 'is_bacteria_present',
                         'count_embryo', 'larva_per_adult',
                         'embryo_per_adult', 'survival', 'lethality',
                         'selected_for_scoring', 'gi_score_larva_per_adult',
                         'gi_score_survival')

    legacy_query = ('SELECT expID, 96well, '
                    'mutantAllele, targetRNAiClone, RNAiPlateID, '
                    'areaWorm, areaLarvae, areaEmbryo, '
                    'AdultCount, LarvaeCount, EggCount, '
                    'EggPerWorm, LarvaePerWorm, survival, lethality, '
                    'machineCall, machineDetectBac, '
                    'GIscoreLarvaePerWorm, GIscoreSurvival '
                    'FROM RawDataWithScore '
                    'WHERE (expID < 40000 OR expID >= 50000) '
                    'AND RNAiPlateID NOT LIKE "Julie%" '
                    'ORDER BY expID, 96well')

    def sync_score_row(legacy_row):
        # Build the object using the minimimum fields
        count_adult = legacy_row[8]
        count_larva = legacy_row[9]
        if count_adult == -1:
            count_adult = None
        if count_larva == -1:
            count_larva = None

        machine_call = legacy_row[15]
        if machine_call:
            selected_for_scoring = True
        else:
            selected_for_scoring = False

        new_score = DevstarScore(
            experiment=get_experiment(legacy_row[0], legacy_row[1]),
            area_adult=legacy_row[5],
            area_larva=legacy_row[6],
            area_embryo=legacy_row[7],
            count_adult=count_adult,
            count_larva=count_larva,
            is_bacteria_present=legacy_row[16],
            selected_for_scoring=selected_for_scoring,
            gi_score_larva_per_adult=legacy_row[17],
            gi_score_survival=legacy_row[18]
        )

        # Clean the object to populate the fields derived from other fields
        new_score.clean()

        ###############
        # SANITY CHECKS
        ###############

        errors = []

        new_allele = new_score.experiment.worm_strain.allele
        if (new_allele != legacy_row[2]):
            # Deal with case of legacy database using zc310 instead of zu310
            if (legacy_row[2] == 'zc310' and
                    new_allele == 'zu310'):
                pass

            # Deal with some case sensitivity issues in legacy database
            # (e.g. see experiment 32405, where the allele is capitalized).
            # Can't do lower() in all cases because "N2" should always be
            # capitalized.
            elif new_allele == legacy_row[2].lower():
                pass

            else:
                errors.append('allele mismatch')

        # Deal with case of some experiments having the wrong RNAiPlateID
        # in the legacy database's RawDataWithScore table. This field is
        # redundant with the RawData table, and RawData is more trustworthy;
        # however it is still worthwhile to perform this check in order
        # to find the mismatches, and to confirm manually that each one
        # makes sense.
        new_lp = new_score.experiment.library_stock.plate_id
        legacy_lp = legacy_row[4].replace('_', '-').replace('zc310', 'zu310')
        if (legacy_lp != new_lp and
                new_score.experiment.plate_id not in (461, 8345) and
                ('vidal-' not in new_lp or
                    legacy_lp != new_lp.split('vidal-')[1])):
            errors.append('RNAi plate mismatch: {} {}')

        if new_score.count_embryo != legacy_row[10]:
            errors.append('embryo count mismatch')

        if (new_score.embryo_per_adult and legacy_row[8] and
                legacy_row[8] != -1 and
                int(new_score.embryo_per_adult) != legacy_row[11]):
            errors.append('embryo per adult mismatch')

        if (new_score.larva_per_adult and legacy_row[9] and
                legacy_row[9] != -1 and
                int(new_score.larva_per_adult) != legacy_row[12]):
            errors.append('larva per adult mismatch')

        if (new_score.survival and not compare_floats_for_equality(
                new_score.survival, legacy_row[13]) and
                legacy_row[13] != 0):
            errors.append('invalid survival')

        if (new_score.lethality and not compare_floats_for_equality(
                new_score.lethality, legacy_row[14]) and
                legacy_row[13] != 0):
            errors.append('invalid lethality')

        if errors:
            raise CommandError(
                'DevstarScore for {}:{} had these errors: {}'
                .format(legacy_row[0], legacy_row[1], errors))

        return update_or_save_object(
            command, new_score, recorded_scores, fields_to_compare,
            alternate_pk={'experiment': new_score.experiment})

    sync_rows(command, cursor, legacy_query, sync_score_row)
Esempio n. 11
0
def update_ManualScore_table_primary(command, cursor):
    """
    Update ManualScore according to the legacy table of the same name.

    Requires that ScoreYear, ScoreMonth, ScoreDate, and ScoreTime
    are valid fields for a python datetime.datetime.

    As described in update_ManualScoreCode_table, do not migrate scores
    with score code -8, -1, 4, 5, or 6.

    Also as described in update_ManualScoreCode_table, convert
    any -6 scores to -7 during migration.

    In addition, we've made these decisions about score migration:
        - *?*?* do not migrate isJunk = -1 scores, as we are probably trashing
          these experiments entirely *?*?*

        - scrap scorer Julie, treating her scores as follows:
            no bacteria (-2) scores: mark as scored by hueyling (hueyling did
                    populate these scores automatically, and Julie was simply
                    listed as the default scorer in the old database
            all other scores: do not migrate (all correspond to misbehaving
                    spn-4 line and have no bearing on our results)

        - scrap scorer expPeople, treating scores as hueyling

        - scrap alejandro and katy as scorers, not migrating any of their
          scores (all their scores were redundant; alejandro was not trained
          well, and katy's were done prior to other decisions)

        - scrap ENH scores only by eliana and lara

        - keep ENH scores by sherly, kelly, and patricia, but do not have
          these show in the interface

        - for sherly and patricia's ENH scores, ensure that any medium or
          strong enhancers were caught by official scorers
    """
    recorded_scores = ManualScore.objects.all()
    fields_to_compare = None

    legacy_query = ('SELECT ManualScore.expID, ImgName, score, scoreBy, '
                    'scoreYMD, ScoreYear, ScoreMonth, ScoreDate, '
                    'ScoreTime, mutant, screenFor '
                    'FROM ManualScore '
                    'LEFT JOIN RawData '
                    'ON ManualScore.expID = RawData.expID '
                    'WHERE score != -8 AND score != -1 AND score != 4 '
                    'AND score != 5 AND score != 6 '
                    'ORDER BY ManualScore.expID, ImgName')

    def sync_score_row(legacy_row):
        legacy_score_code = legacy_row[2]
        legacy_scorer = legacy_row[3]
        gene = legacy_row[9]
        screen = legacy_row[10]

        # Skip some scores entirely (see criteria above)
        skip_entirely = ('alejandro', 'katy')
        skip_ENH = ('eliana', 'lara')

        if ((legacy_scorer == 'Julie' and legacy_score_code != -2)
                or (legacy_scorer in skip_entirely)
                or (screen == 'ENH' and legacy_scorer in skip_ENH)):
            command.stderr.write(
                'FYI: Skipping a {} {} score of {} by {}\n'.format(
                    gene, screen, legacy_score_code, legacy_scorer))

            return True

        # Convert some scorers to hueyling (see criteria above)
        if legacy_scorer == 'Julie' or legacy_scorer == 'expPeople':
            command.stderr.write(
                'FYI: Converting a {} score by {} to hueyling\n'.format(
                    legacy_score_code, legacy_scorer))

            legacy_scorer = 'hueyling'

        # Convert some score codes (see criteria above)
        if legacy_score_code == -6:
            command.stderr.write('FYI: Converting score from -6 to -7\n')
            legacy_score_code = -7

        # Following raise exceptions if improperly formatted or not found
        experiment = get_experiment(legacy_row[0], tile_to_well(legacy_row[1]))
        score_code = get_score_code(legacy_score_code)
        scorer = get_user(legacy_scorer)

        timestamp = get_timestamp(legacy_row[5], legacy_row[6], legacy_row[7],
                                  legacy_row[8], legacy_row[4])
        if not timestamp:
            raise CommandError(
                'ERROR: score of {} could  not be converted to a proper '
                'datetime'.format(experiment))

        new_score = ManualScore(experiment=experiment,
                                score_code=score_code,
                                scorer=scorer,
                                timestamp=timestamp)

        return update_or_save_object(command,
                                     new_score,
                                     recorded_scores,
                                     fields_to_compare,
                                     alternate_pk={
                                         'experiment': new_score.experiment,
                                         'score_code': new_score.score_code,
                                         'scorer': new_score.scorer,
                                         'timestamp': new_score.timestamp
                                     })

    sync_rows(command, cursor, legacy_query, sync_score_row)
Esempio n. 12
0
def update_Experiment_tables(command, cursor):
    """
    Update ExperimentPlate and Experiment according to legacy table `RawData`.

    Several datatype transforms occur from the old to the new schema:

        - recordDate string becomes a DATE
        - temperature string (suffixed 'C') becomes a decimal
        - plate-level mutant/mutantAllele becomes well-level FK to WormStrain
        - plate-level RNAiPlateID becomes becomes well-level pointers to
          LibraryStocks
        - plate-level isJunk with three values (-1, 0, 1) becomes a boolean
          (with both 1 and -1 becoming 1), and and becomes well-level

    Also, experiments of Julie's (which were done with a line of spn-4 worms
    later deemed untrustworthy) are excluded.
    """
    recorded_plates = ExperimentPlate.objects.all()
    recorded_wells = Experiment.objects.all()

    plate_fields_to_compare = ('screen_stage', 'temperature', 'date',
                               'comment')

    well_fields_to_compare = ('plate', 'well', 'worm_strain', 'library_stock',
                              'is_junk')

    legacy_query = ('SELECT expID, mutant, mutantAllele, RNAiPlateID, '
                    'CAST(SUBSTRING_INDEX(temperature, "C", 1) '
                    'AS DECIMAL(3,1)), '
                    'CAST(recordDate AS DATE), ABS(isJunk), comment '
                    'FROM RawData '
                    'WHERE (expID < 40000 OR expID >= 50000) '
                    'AND RNAiPlateID NOT LIKE "Julie%" '
                    'ORDER BY expID')

    def sync_experiment_row(legacy_row):
        experiment_plate_id = legacy_row[0]
        worm_strain = get_worm_strain(legacy_row[1], legacy_row[2])
        legacy_library_plate_name = legacy_row[3]
        temperature = legacy_row[4]
        date = legacy_row[5]
        is_junk = legacy_row[6]
        comment = legacy_row[7]

        all_match = True

        if experiment_plate_id < 40000:
            screen_stage = 1
        else:
            screen_stage = 2

        new_plate = ExperimentPlate(id=experiment_plate_id,
                                    screen_stage=screen_stage,
                                    temperature=temperature,
                                    date=date,
                                    comment=comment)

        all_match &= update_or_save_object(command, new_plate, recorded_plates,
                                           plate_fields_to_compare)

        experiment_plate = get_experiment_plate(experiment_plate_id)

        for well in get_well_list():
            new_well = Experiment(
                id=generate_experiment_id(experiment_plate_id, well),
                plate=experiment_plate,
                well=well,
                worm_strain=worm_strain,
                library_stock=get_library_stock(legacy_library_plate_name,
                                                well),
                is_junk=is_junk)

            all_match &= update_or_save_object(command, new_well,
                                               recorded_wells,
                                               well_fields_to_compare)

        return all_match

    sync_rows(command, cursor, legacy_query, sync_experiment_row)
Esempio n. 13
0
def update_DevstarScore_table(command, cursor):
    """
    Update DevstarScore according to legacy table `RawDataWithScore`.

    Redundant fields (mutantAllele, targetRNAiClone, RNAiPlateID) are
    excluded.

    The DevStaR output is simply 6 fields (adult/larva/embryo area,
    adult/larva count, and whether bacteria is present).
    Several other fields are simply calculations on these fields
    (embryo per adult, larva per adult, survival, lethality).
    I redo these calculations, but compare to the legacy value to make sure
    it is equal or almost equal. We also have some datatype conversions:
        - embryo/larva per adult become Float
        - survival and lethality become higher precision Floats
        - counts of -1 become NULL (this happens when part of the DevStaR
          program did not run)
        - division by 0 in embryo/larva per adult remain 0, but when
          adult=0 we DO now calculate survival and lethality
        - machineCall becomes a Boolean
    """
    recorded_scores = DevstarScore.objects.all()

    fields_to_compare = ('area_adult', 'area_larva', 'area_embryo',
                         'count_adult', 'count_larva', 'is_bacteria_present',
                         'count_embryo', 'larva_per_adult', 'embryo_per_adult',
                         'survival', 'lethality', 'selected_for_scoring',
                         'gi_score_larva_per_adult', 'gi_score_survival')

    legacy_query = ('SELECT expID, 96well, '
                    'mutantAllele, targetRNAiClone, RNAiPlateID, '
                    'areaWorm, areaLarvae, areaEmbryo, '
                    'AdultCount, LarvaeCount, EggCount, '
                    'EggPerWorm, LarvaePerWorm, survival, lethality, '
                    'machineCall, machineDetectBac, '
                    'GIscoreLarvaePerWorm, GIscoreSurvival '
                    'FROM RawDataWithScore '
                    'WHERE (expID < 40000 OR expID >= 50000) '
                    'AND RNAiPlateID NOT LIKE "Julie%" '
                    'ORDER BY expID, 96well')

    def sync_score_row(legacy_row):
        # Build the object using the minimimum fields
        count_adult = legacy_row[8]
        count_larva = legacy_row[9]
        if count_adult == -1:
            count_adult = None
        if count_larva == -1:
            count_larva = None

        machine_call = legacy_row[15]
        if machine_call:
            selected_for_scoring = True
        else:
            selected_for_scoring = False

        new_score = DevstarScore(experiment=get_experiment(
            legacy_row[0], legacy_row[1]),
                                 area_adult=legacy_row[5],
                                 area_larva=legacy_row[6],
                                 area_embryo=legacy_row[7],
                                 count_adult=count_adult,
                                 count_larva=count_larva,
                                 is_bacteria_present=legacy_row[16],
                                 selected_for_scoring=selected_for_scoring,
                                 gi_score_larva_per_adult=legacy_row[17],
                                 gi_score_survival=legacy_row[18])

        # Clean the object to populate the fields derived from other fields
        new_score.clean()

        ###############
        # SANITY CHECKS
        ###############

        errors = []

        new_allele = new_score.experiment.worm_strain.allele
        if (new_allele != legacy_row[2]):
            # Deal with case of legacy database using zc310 instead of zu310
            if (legacy_row[2] == 'zc310' and new_allele == 'zu310'):
                pass

            # Deal with some case sensitivity issues in legacy database
            # (e.g. see experiment 32405, where the allele is capitalized).
            # Can't do lower() in all cases because "N2" should always be
            # capitalized.
            elif new_allele == legacy_row[2].lower():
                pass

            else:
                errors.append('allele mismatch')

        # Deal with case of some experiments having the wrong RNAiPlateID
        # in the legacy database's RawDataWithScore table. This field is
        # redundant with the RawData table, and RawData is more trustworthy;
        # however it is still worthwhile to perform this check in order
        # to find the mismatches, and to confirm manually that each one
        # makes sense.
        new_lp = new_score.experiment.library_stock.plate_id
        legacy_lp = legacy_row[4].replace('_', '-').replace('zc310', 'zu310')
        if (legacy_lp != new_lp
                and new_score.experiment.plate_id not in (461, 8345)
                and ('vidal-' not in new_lp
                     or legacy_lp != new_lp.split('vidal-')[1])):
            errors.append('RNAi plate mismatch: {} {}')

        if new_score.count_embryo != legacy_row[10]:
            errors.append('embryo count mismatch')

        if (new_score.embryo_per_adult and legacy_row[8]
                and legacy_row[8] != -1
                and int(new_score.embryo_per_adult) != legacy_row[11]):
            errors.append('embryo per adult mismatch')

        if (new_score.larva_per_adult and legacy_row[9] and legacy_row[9] != -1
                and int(new_score.larva_per_adult) != legacy_row[12]):
            errors.append('larva per adult mismatch')

        if (new_score.survival and not compare_floats_for_equality(
                new_score.survival, legacy_row[13]) and legacy_row[13] != 0):
            errors.append('invalid survival')

        if (new_score.lethality and not compare_floats_for_equality(
                new_score.lethality, legacy_row[14]) and legacy_row[13] != 0):
            errors.append('invalid lethality')

        if errors:
            raise CommandError(
                'DevstarScore for {}:{} had these errors: {}'.format(
                    legacy_row[0], legacy_row[1], errors))

        return update_or_save_object(
            command,
            new_score,
            recorded_scores,
            fields_to_compare,
            alternate_pk={'experiment': new_score.experiment})

    sync_rows(command, cursor, legacy_query, sync_score_row)