Esempio n. 1
0
def load(args):
    dss_query = _build_dss_query(project_uuids=args.project_uuids, bundle_fqids=args.bundle_fqids)
    staging_dir = os.path.abspath('/mnt')
    content_type_patterns = ['application/json; dcp-type="metadata*"']  # match metadata
    filename_patterns = ["*zarr*",  # match expression data
                         "*.results",  # match SS2 raw count files
                         "*.mtx", "genes.tsv", "barcodes.tsv", "empty_drops_result.csv"]  # match 10X raw count files

    is_update = True if args.project_uuids or args.bundle_fqids else False
    if args.state == 0 or args.state == 1:
        etl.etl_dss_bundles(query=dss_query,
                            content_type_patterns=content_type_patterns,
                            filename_patterns=filename_patterns,
                            transformer_cb=etl.transform_bundle,
                            finalizer_cb=etl.finalizer_update if is_update else etl.finalizer_reload,
                            staging_directory=staging_dir,
                            deployment_stage=os.environ['DEPLOYMENT_STAGE'],
                            max_workers=int(multiprocessing.cpu_count()/2),
                            max_dispatchers=int(multiprocessing.cpu_count()/2),
                            dispatcher_executor_class=concurrent.futures.ProcessPoolExecutor)
    elif args.state == 2:
        etl.upload_and_load(staging_dir, is_update=is_update)
    elif args.state == 3:
        etl.load_tables(args.s3_upload_id, is_update=is_update)
    _verify_load(es_query=dss_query)
Esempio n. 2
0
    def test_load_tables_update(self, mock_transaction, mock_create_tables,
                                mock_error):
        job_id = str(uuid.uuid4())
        load_tables(job_id, is_update=True)
        mock_transaction.assert_called_once_with(mock.ANY)

        mock_transaction.side_effect = psycopg2.Error()
        load_tables(job_id, is_update=True)
        self.assertTrue(mock_error.called)