def test_find_file_by_search_terms(
        gdrive_service: DriveService,
        gdrive_folder: DriveObject,
        subdir="test_find_file_by_search_terms",
        search_terms=("hello", "world", "who"),
        filenames=("hello____world-who.txt", "hello____world----who.txt"),
):
    """Search for files by search terms."""

    subdir = mkdir(gdrive_service, gdrive_folder.id, subdir)

    put_request = put_file(gdrive_service, subdir.id, filenames[0])
    with put_request as fh:
        fh.write("this is random text 1")

    # find the file and retrieve the contents.
    result = find_file_by_search_terms(gdrive_service, subdir.id, search_terms)
    assert result.id == put_request.id

    # find the file and retrieve the contents.  this should fail because we
    # have an extra term that is not satisfied.
    with pytest.raises(RuntimeError):
        result = find_file_by_search_terms(gdrive_service, subdir.id,
                                           search_terms + ("chicken", ))

    put_request = put_file(gdrive_service, subdir.id, filenames[1])
    with put_request as fh:
        fh.write("this is random text 2")

    # find the file and retrieve the contents.  this should fail because we
    # now have multiple matching files.
    with pytest.raises(RuntimeError):
        result = find_file_by_search_terms(gdrive_service, subdir.id,
                                           search_terms)
Beispiel #2
0
 def compile_accessions_from_list(self, sample_barcodes: List[str],
                                  local_processing: bool):
     """Compile accessions from list of sample barcodes"""
     for sample_barcode in sample_barcodes:
         try:
             accession_folder_entry = drive.find_file_by_search_terms(
                 self.drive_service,
                 self.processing_resources.accession_folder_id,
                 [sample_barcode, ".csv"],
                 drive.FindMode.MOST_RECENTLY_MODIFIED,
             )
         except NoMatchesError:
             # try for legacy
             try:
                 accession_folder_entry = drive.find_file_by_search_terms(
                     self.drive_service,
                     self.processing_resources.accession_folder_id,
                     [sample_barcode, ".xlsx"],
                     drive.FindMode.MOST_RECENTLY_MODIFIED,
                 )
             except NoMatchesError:
                 logger.error(
                     f"Could not find plate layout file for barcode: {sample_barcode}"
                 )
                 continue
         self.compile_accession_info_from_file(accession_folder_entry,
                                               local_processing)
Beispiel #3
0
def get_accession_data(
    service: DriveService,
    folder_id: str,
    sample_metadata_form: CollectiveForm,
    sample_barcode: str,
) -> AccessionData:
    metadata_row = sample_metadata_form[sample_metadata_form[
        SampleMetadata.SAMPLE_PLATE_BARCODE] == sample_barcode]

    plate_map_file = None
    try:
        metadata_row = clean_single_row(metadata_row,
                                        SampleMetadata.SAMPLE_PLATE_BARCODE,
                                        sample_barcode, -1)
    except MetadataNotFoundError:
        logger.exception(f"No metadata found for {sample_barcode}")
    else:
        try:
            plate_map_file = get_layout_file_from_url(
                service, metadata_row[SampleMetadata.SAMPLE_PLATE_MAP])
        except HttpError:
            logger.exception(f"Dead link in {SampleMetadata.SHEET_NAME}")
        except KeyError:
            raise BadDriveURL(
                f"Bad link in {SampleMetadata.SHEET_NAME} for {sample_barcode}"
            )

    if plate_map_file is None:
        logger.error(f"No results found in {SampleMetadata.SHEET_NAME}, "
                     f"searching plate layout folder for {sample_barcode}")
        plate_map_file = find_file_by_search_terms(service, folder_id,
                                                   [sample_barcode])

    plate_map_type = get_plate_map_type_from_name(plate_map_file.name)

    with plate_map_file.open() as fh:
        accession_data = read_accession_data(plate_map_type, fh)

    return accession_data
def test_find_file_by_search_terms_exclude_contents(
        gdrive_service: DriveService,
        gdrive_folder: DriveObject,
        subdir="test_find_file_by_search_terms_exclude_contents",
        search_terms=("hello", "world", "who"),
        filenames=("keywords_not_in_filename.txt",
                   "hello____world----who.txt"),
):
    """Search for files by search terms."""

    subdir = mkdir(gdrive_service, gdrive_folder.id, subdir)

    put_request = put_file(gdrive_service, subdir.id, filenames[0])
    with put_request as fh:
        fh.write(" ".join(search_terms))

    put_request = put_file(gdrive_service, subdir.id, filenames[1])
    with put_request as fh:
        fh.write("this is random text 2")

    # find the file and retrieve the contents.
    result = find_file_by_search_terms(gdrive_service, subdir.id, search_terms)
    assert result.id == put_request.id
Beispiel #5
0
    def populate_models(self):
        """Create all models and insert into DB"""
        # get all working plates, lookup coresponding plate layout file and get czb ids from that
        working_plate_models = self.session.query(WorkingPlate).all()
        og_plates_taken_out = set()

        existing_czb_id_to_working_plate_models = self.session.query(
            CZBIDWorkingPlate
        ).all()
        existing_czb_id_to_working_plates = {
            f"{result.czb_id}-{result.working_plate_id}"
            for result in existing_czb_id_to_working_plate_models
        }
        for working_plate in working_plate_models:
            plate_layout_file = find_file_by_search_terms(
                service=self.drive_service,
                folder_id=self.folder_id,
                search_terms=[working_plate.barcode, ".csv"],
                find_mode=FindMode.MOST_RECENTLY_MODIFIED,
            )
            with plate_layout_file.open() as fh:
                if plate_layout_file.name.endswith(".xlsx"):
                    plate_layout_data = pd.read_excel(fh)
                else:
                    plate_layout_data = pd.read_csv(fh)
                czb_ids_to_well = (
                    plate_layout_data[["CZB_ID", "Destination_Well"]].dropna().values
                )
                for czb_id, well in czb_ids_to_well:
                    if (
                        f"{czb_id}-{working_plate.id}"
                        in existing_czb_id_to_working_plates
                    ) or check_control(czb_id):
                        continue
                    czb_id_model = (
                        self.session.query(CZBID)
                        .filter(CZBID.czb_id == czb_id)
                        .one_or_none()
                    )
                    if not czb_id_model:
                        log.error(f"Did not find entry for {czb_id}")
                        continue
                    # check for og plate association for thaws
                    czb_id_to_og_plate_model = (
                        self.session.query(CZBIDOgPlate)
                        .filter(CZBIDOgPlate.czb_id_id == czb_id_model.id)
                        .one_or_none()
                    )
                    if not czb_id_to_og_plate_model:
                        # must be internal sample. Add first thaw
                        self.session.add(
                            CZBIDThaw(czb_id_id=czb_id_model.id, volume_removed=20)
                        )
                    else:
                        og_plates_taken_out.add(czb_id_to_og_plate_model.og_plate)
                    self.session.add(
                        CZBIDWorkingPlate(
                            czb_id=czb_id_model,
                            working_plate_id=working_plate.id,
                            well_id=well,
                        )
                    )
        for og_plate in og_plates_taken_out:
            all_czb_ids_on_og_plate = (
                self.session.query(CZBIDOgPlate)
                .filter(CZBIDOgPlate.og_plate == og_plate)
                .all()
            )
            for model in all_czb_ids_on_og_plate:
                self.session.add(
                    CZBIDThaw(czb_id_id=model.czb_id_id, volume_removed=20)
                )