def _transform_unidify( self, results_dir: Path, twitter_api_settings: TwitterApiSettings, ) -> Counter[_ExecuteResult]: result_counter = Counter[_ExecuteResult]() head, entries_tweet_ids = spy( self._iter_entries_tweet_ids(results_dir, result_counter) ) if not head: # Check if any entries with Tweet-IDs exist (else unzip fails). return result_counter entries, tweet_ids = cast( Tuple[Iterator[BatchEntry], Iterator[TweetId]], unzip(entries_tweet_ids) ) for entry, tweets in groupby_transform( zip(entries, statuses_lookup(tweet_ids, twitter_api_settings)), keyfunc=itemgetter(0), valuefunc=itemgetter(1), ): write_jsonl_lines( results_dir / entry.data_file_name, (tweet for tweet in tweets if tweet is not None), use_lzma=True, ) write_json( results_dir / entry.meta_file_name, entry, overwrite_existing=True ) result_counter[_ExecuteResult.SUCCESS] += 1 return result_counter
def _grouped_by_user(potential_targets): """Groups users with their child-study notification pairs. Note: Depends on sorted output (hence the ORDER BY in SQL) See: https://more-itertools.readthedocs.io/en/stable/api.html#more_itertools.groupby_transform """ get_user_id = attrgetter("user_id") get_child_study_pair = attrgetter("child_id", "study_id") yield from ( ( user_id, tuple(pair), ) # Apparently, valuefunc gets wrapped to return a map object? for user_id, pair in groupby_transform( potential_targets, keyfunc=get_user_id, valuefunc=get_child_study_pair, ))
def combine_times(movie_names, movie_times): """Combine times for duplicate movienames :movie_names: [str] :movie_times: [[str], [str]] :returns: (list of movie names, list of lists of movie times) """ assert len(movie_names) == len(movie_times), '{} != {}'.format( movie_name, movie_times) if not movie_names: return [], [] movie_names, movie_times = zip( *[(k, list(chain.from_iterable(g))) for k,g in groupby_transform( zip(movie_names, movie_times), itemgetter(0), itemgetter(1))]) return list(movie_names), list(movie_times)
def combine_times(movie_names, movie_times): """Combine times for duplicate movienames :movie_names: [str] :movie_times: [[str], [str]] :returns: (list of movie names, list of lists of movie times) """ assert len(movie_names) == len( movie_times), f'{len(movie_names)} != {len(movie_times)}' if not movie_names: return [], [] movie_names, movie_times = zip(*[ (k, list(chain.from_iterable(g))) for k, g in groupby_transform( sorted(zip(movie_names, movie_times)), itemgetter(0), # group by name itemgetter(1)) ]) # output grouped times return list(movie_names), list(movie_times)
def update_dart_fields(config: Config, samples: List[SampleDoc]) -> bool: """Updates DART plates and wells following updates to the filtered positive fields Arguments: config {Config} -- application config specifying database details samples {List[Dict[str, str]]} -- the list of samples to update in DART Returns: bool -- whether the updates completed successfully """ sql_server_connection = create_dart_sql_server_conn(config) if sql_server_connection is None: raise ValueError("Unable to establish DART SQL Server connection") dart_updated_successfully = True labclass_by_centre_name = biomek_labclass_by_centre_name(config.CENTRES) try: logger.info("Writing to DART") cursor = sql_server_connection.cursor() for plate_barcode, samples_in_plate in groupby_transform( samples, lambda x: x[FIELD_PLATE_BARCODE], reducefunc=lambda x: list(x)): try: labware_class = labclass_by_centre_name[samples_in_plate[0] [FIELD_SOURCE]] plate_state = add_dart_plate_if_doesnt_exist( cursor, plate_barcode, labware_class # type:ignore ) if plate_state == DART_STATE_PENDING: for sample in samples_in_plate: if sample[FIELD_RESULT] == POSITIVE_RESULT_VALUE: well_index = get_dart_well_index( sample.get(FIELD_COORDINATE, None)) if well_index is not None: well_props = map_mongo_doc_to_dart_well_props( sample) set_dart_well_properties( cursor, plate_barcode, well_props, well_index # type:ignore ) else: raise ValueError( "Unable to determine DART well index for sample " f"{sample[FIELD_ROOT_SAMPLE_ID]} in plate {plate_barcode}" ) cursor.commit() dart_updated_successfully &= True except Exception as e: logger.error( f"Failed updating DART for samples in plate {plate_barcode}" ) logger.exception(e) cursor.rollback() dart_updated_successfully = False logger.info("Updating DART completed") except Exception as e: logger.error("Failed updating DART") logger.exception(e) dart_updated_successfully = False finally: sql_server_connection.close() return dart_updated_successfully