Exemple #1
0
def upload_etl_file(config,
                    key_name,
                    barcode2field2value,
                    log,
                    type_bio,
                    remove_keys=[]):
    log.info('\tstart upload_etl_file(%s)' % (key_name))
    output_file = StringIO()

    for field2value in barcode2field2value.itervalues():
        for remove_key in remove_keys:
            if remove_key[0] in field2value and field2value[
                    remove_key[0]] in remove_key[1]:
                log.warning(
                    "\t\tWARNING: %s samples should be excluded. Skipped sample: %s"
                    % (remove_key[0], field2value['SampleBarcode']))
                continue
        output_file.write(json.dumps(field2value) + "\n")
    tmp_dir_parent = os.environ.get('ISB_TMP', '/tmp/')
    path = os.path.join(tmp_dir_parent, type_bio + '/')
    if not os.path.isdir(path):
        os.makedirs(path)
    file_path = path + type_bio + '.json'
    with open(file_path, 'w') as bio_file:
        bio_file.write(output_file.getvalue())
        output_file.close()
    bucket_name = config['buckets']['open']
    if config['upload_etl_files']:
        gcs_wrapper.upload_file(file_path, bucket_name, key_name, log)
        log.info('\tuploaded etl file')
    else:
        log.info('\tnot uploading etl file')
    log.info('\tfinish upload_etl_file')
Exemple #2
0
def upload_etl_file(config, key_name, barcode2field2value, log, type_bio, remove_keys=[]):
    log.info('\tstart upload_etl_file(%s)' % (key_name))
    output_file = StringIO()
    
    for field2value in barcode2field2value.itervalues():
        for remove_key in remove_keys:
            if remove_key[0] in field2value and field2value[remove_key[0]] in remove_key[1]:
                log.warning("\t\tWARNING: %s samples should be excluded. Skipped sample: %s" % (remove_key[0], field2value['SampleBarcode']))
                continue
        output_file.write(json.dumps(field2value) + "\n")
    tmp_dir_parent = os.environ.get('ISB_TMP', '/tmp/')
    path = os.path.join(tmp_dir_parent, type_bio + '/')
    if not os.path.isdir(path):
        os.makedirs(path)
    file_path = path + type_bio + '.json'
    with open(file_path, 'w') as bio_file:
        bio_file.write(output_file.getvalue())
        output_file.close()
    bucket_name = config['buckets']['open']
    if config['upload_etl_files']:
        gcs_wrapper.upload_file(file_path, bucket_name, key_name, log)
        log.info('\tuploaded etl file')
    else:
        log.info('\tnot uploading etl file')
    log.info('\tfinish upload_etl_file')
def upload_latestarchive_file(config, archive_file_path, log):
    bucket_name = config['buckets']['open']
    key_name = '/%s/%s' % (config['latestarchive_folder'], str(date.today()).replace('-', '_') + '_' + 'latestarchive.txt')
    if config['upload_files'] and config['upload_open']:
        log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
        gcs_wrapper.upload_file(archive_file_path, bucket_name, key_name, log)
    else:
        log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
Exemple #4
0
def upload_latestarchive_file(config, archive_file_path, log):
    bucket_name = config['buckets']['open']
    key_name = '/%s/%s' % (config['latestarchive_folder'], str(
        date.today()).replace('-', '_') + '_' + 'latestarchive.txt')
    if config['upload_files'] and config['upload_open']:
        log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
        gcs_wrapper.upload_file(archive_file_path, bucket_name, key_name, log)
    else:
        log.info('\tnot uploading %s to %s' % (archive_file_path, key_name))
Exemple #5
0
def upload_files(config, archive_path, file2metadata, log):
    # TODO: for the DatafileNameKey, use the value already in the metadata
    files = os.listdir(archive_path)
    if 0 < len(files):
        bucket_name, key_prefix = get_bucket_key_prefix(config, file2metadata[files[0]])
        for file_name in files:
            metadata = file2metadata[file_name]
            key_name = key_prefix + metadata['DataLevel'].replace(' ', '_') + '/'+ file_name
            metadata['DatafileNameKey'] = key_name
            if config['upload_files']:
                gcs_wrapper.upload_file(archive_path + file_name, bucket_name, key_name, log)
    else:
        log.warning('\tno files for %s' % (archive_path))
Exemple #6
0
def upload_sdrf_file(config, archive_path, file_name, metadata, log):
    center2platform = config['upload_archives']['mage-tab']
    try:
        if metadata['DataCenterName'] not in center2platform or metadata['Platform'] not in center2platform[metadata['DataCenterName']]:
            log.info('\t\tskipping uploading %s from sdrf archive' % (file_name))
            return
    except Exception as e:
        log.exception('problem checking uploading SDRF file')
        raise e
    bucket_name = config['buckets']['open']
    key_name = getSDRFKeyName(file_name, metadata, log)
    if config['upload_files'] and config['upload_open']:
        gcs_wrapper.upload_file(archive_path + file_name, bucket_name, key_name, log)
    else:
        log.info('\t\tnot uploading %s from sdrf archive to %s' % (file_name, key_name))
Exemple #7
0
def upload_file(config, file_path, key_name, log):
    bucket_name = config['buckets']['open']
    if config['upload_files']:
        log.info('\tuploading %s' % (key_name))
        gcs_wrapper.upload_file(file_path, bucket_name, key_name, log)