def main(config, access_token, profile):

    app_ctx = yaml.load(config)

    ega_xml_info = app_ctx['ega_xml_info']
    ega_xml = {}
    with open(ega_xml_info, 'r', newline='') as f:
        for l in f:
            analysisId = l.split('\t')[0]
            ega_xml[analysisId] = l.rstrip()

    if os.path.isfile(app_ctx['log']):
        os.remove(app_ctx['log'])
    if os.path.isfile(app_ctx['xml_mismatch']):
        os.remove(app_ctx['xml_mismatch'])
    if not os.path.isdir(app_ctx['xml_fix_dir']):
        os.mkdir(app_ctx['xml_fix_dir'])

    song_file = app_ctx[profile]
    with open(song_file, 'r', newline='') as f:
        for l in f:
            projectCode, objectId, analysisId, fileName, songMd5 = l.rstrip(
            ).split('\t')

            # double check for safety reason
            if profile == 'aws' and not projectCode in app_ctx['aws_approved']:
                with open(app_ctx['log'], 'a') as f:
                    f.write(
                        '{0}::{1} object {2}: is not allowed in AWS\n'.format(
                            projectCode, analysisId, objectId))
                continue

            # only download if there is no local copy
            fpath = os.path.join(app_ctx['xml_dir'], fileName)
            if not os.path.isfile(fpath):
                # download the xml from collab
                subprocess.check_output([
                    'score-client', '--profile', profile, 'download',
                    '--object-id', objectId, '--validate', 'false', '--force',
                    '--output-dir', app_ctx['xml_dir']
                ])

            # get file md5sum for the one downloaded by score client
            scoreMd5 = get_md5(fpath)
            scoreSize = os.path.getsize(fpath)

            # handle the ega xml
            if analysisId.startswith('EGA') and fileName.startswith('bundle'):
                # generate the xml from ega jobs
                if not ega_xml.get(analysisId):
                    click.echo(
                        '{}::{}: the ega transfer job is missing'.format(
                            projectCode, analysisId))
                    with open(app_ctx['log'], 'a') as f:
                        f.write(
                            '{0}::{1}: the ega transfer job is missing in the completed folder\n'
                            .format(projectCode, analysisId))
                    continue

                if not os.path.isdir(app_ctx['ega_xml_dir']):
                    os.makedirs(app_ctx['ega_xml_dir'])

                fpath = os.path.join(app_ctx['ega_xml_dir'], fileName)
                if not os.path.isfile(fpath):
                    generate_metadata_xml(ega_xml[analysisId], app_ctx)

            # get file md5sum for the one to upload
            fileMd5 = get_md5(fpath)
            fileSize = os.path.getsize(fpath)

            if not fileMd5 == songMd5 or not fileMd5 == scoreMd5:
                with open(app_ctx['xml_mismatch'], 'a') as f:
                    f.write('\t'.join(
                        [projectCode, analysisId, scoreMd5, songMd5, fileMd5]))
                    f.write('\n')

            # skip the fix ones
            fixpath = os.path.join(app_ctx['xml_fix_dir'], fileName + '.fix')
            if os.path.isfile(fixpath): continue

            # upload to storage
            subprocess.check_output([
                'score-client', '--profile', profile, 'upload', '--md5',
                fileMd5, '--file', fpath, '--object-id', objectId, '--force'
            ])

            # copy xml to open meta bucket
            if profile == 'collab':
                subprocess.check_output([
                    'aws', '--endpoint-url',
                    app_ctx[profile + '_endpoint_url'], '--profile', profile,
                    's3', 'cp', fpath, app_ctx['meta_bucket_url'] + objectId
                ])
            else:
                subprocess.check_output([
                    'aws', '--profile', 'amazon_pay', 's3', 'cp', fpath,
                    app_ctx['meta_bucket_url'] + objectId
                ])

            # update the song
            server_url = app_ctx['song'][profile]
            api_config = ApiConfig(server_url, projectCode, access_token)
            api = Api(api_config)

            # check whether the song payload need to be updated
            if not fileMd5 == songMd5:
                # update the file
                fileUpdate = FileUpdateRequest()
                fileUpdate.fileSize = fileSize
                fileUpdate.fileMd5sum = fileMd5
                api.update_file(objectId, fileUpdate)

            # publish the analysis
            if not api.get_analysis(
                    analysisId).__dict__['analysisState'] == "PUBLISHED":
                try:
                    api.publish(analysisId)
                except:
                    with open(app_ctx['log'], 'a') as f:
                        f.write('{0}::{1}: can not be published\n'.format(
                            projectCode, analysisId))

            with open(fixpath, 'w') as w:
                w.write('')

    return
Example #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Generate a song payload using minimal arguments')
    parser.add_argument('-s',
                        '--study-id',
                        dest="study_id",
                        help="Study ID",
                        required=True)
    parser.add_argument('-u',
                        '--server-url',
                        dest="server_url",
                        help="Server URL",
                        required=True)
    parser.add_argument('-p',
                        '--payload',
                        dest="payload",
                        help="JSON Payload",
                        required=True)
    parser.add_argument('-o',
                        '--output',
                        dest="output",
                        help="Output manifest file",
                        required=True)
    parser.add_argument('-d',
                        '--input-dir',
                        dest="input_dir",
                        help="Payload files directory",
                        required=True)
    parser.add_argument('-t',
                        '--access-token',
                        dest="access_token",
                        default=os.environ.get('ACCESSTOKEN', None),
                        help="Server URL")
    parser.add_argument('-j', '--json', dest="json_output")
    results = parser.parse_args()

    study_id = results.study_id
    server_url = results.server_url
    access_token = results.access_token
    payload = results.payload
    analysis_id = json.load(open(payload)).get('analysisId')

    config = ApiConfig(server_url, study_id, access_token, debug=True)
    api = Api(config)

    client = FileUploadClient(api,
                              payload,
                              is_async_validation=True,
                              ignore_analysis_id_collisions=True)

    analysis = None
    try:
        analysis = api.get_analysis(analysis_id)

        if analysis.analysisId is '':
            raise Exception('Analysis id ' + analysis_id + ' not found')

        payload_files = json.load(open(payload)).get('file')
        api_files = api.get_analysis_files(analysis_id)

        for i in range(0, len(api_files)):
            if not exists_in_file_array(file_name=api_files[i].fileName,
                                        file_md5=api_files[i].fileMd5sum,
                                        file_type=api_files[i].fileType,
                                        file_size=api_files[i].fileSize,
                                        file_access=api_files[i].fileAccess,
                                        files_array=payload_files):
                print(
                    "Files in  payload do not match the files on song server.")
                exit(1)
    except Exception:
        if analysis is None or analysis is '':
            client.upload()
            client.update_status()
            client.save()

    manifest_filename = results.output
    create_manifest(api, analysis_id, manifest_filename, results.input_dir)

    subprocess.check_output([
        'icgc-storage-client', 'upload', '--manifest',
        os.path.join(results.input_dir, manifest_filename), '--force'
    ])

    api.publish(analysis_id)
    #client.publish()

    if results.json_output:
        with open(os.path.join(results.input_dir, manifest_filename),
                  'r') as f:
            manifest_json = {}
            manifest_json['analysis_id'] = f.readline().split('\t')[0]
            manifest_json['files'] = []
            for line in f.readlines():
                _file = {}
                _file['object_id'] = line.split('\t')[0]
                _file['file_name'] = line.split('\t')[1]
                _file['md5'] = line.split('\t')[2].strip('\n')
                manifest_json['files'].append(_file)
            with open(os.path.join(results.input_dir, results.json_output),
                      'w') as outfile:
                json.dump(manifest_json, outfile)