Python generateの例、gen3_etl.utils.schema.generate Pythonの例

コード例 #1

0

ファイルを表示

    print(p)

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['category'] = 'hop extention'
            schema['properties']['cases'] = {
                '$ref': '_definitions.yaml#/to_one'
            }
            return schema

        item_paths = ['output/hop/file.json']

        link = {
            'name': 'cases',
            'backref': 'submitted_file',
            'label': 'extends',
            'target_type': 'case',
            'multiplicity': 'one_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'submitted_file',
                               output_dir=args.output_dir,
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(p), 'should have an schema file {}'.format(
            schema_path)
        print(schema_path)

コード例 #2

0

ファイルを表示

    assert os.path.isfile(p), 'should have an output file {}'.format(p)
    print(p)

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['properties']['source'] = {'type': 'string'}
            schema['category'] = 'bcc extention'
            schema['properties']['demographic'] = {
                '$ref': '_definitions.yaml#/to_one'
            }
            return schema

        link = {
            'name': 'demographic',
            'backref': 'bcc_demographic',
            'label': 'extends',
            'target_type': 'demographic',
            'multiplicity': 'many_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'bcc_demographic',
                               output_dir='output/bcc',
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(p), 'should have an schema file {}'.format(
            schema_path)
        print(schema_path)

コード例 #3

0

ファイルを表示

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['properties']['source'] = {'type': 'string'}
            schema['properties']['diagnosis_name'] = {
                'type': ['string', 'null']
            }
            schema['category'] = 'bcc extention'
            schema['properties']['diagnosis'] = {
                '$ref': '_definitions.yaml#/to_one'
            }
            return schema

        link = {
            'name': 'diagnosis',
            'backref': 'bcc_diagnosis',
            'label': 'extends',
            'target_type': 'diagnosis',
            'multiplicity': 'many_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'bcc_diagnosis',
                               output_dir='output/bcc',
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(p), 'should have an schema file {}'.format(
            schema_path)
        print(schema_path)

コード例 #4

0

ファイルを表示

        ('source/bcc/voncologsurgery.json', 'Surgery', None),
        ('source/bcc/Radiotherapy.json', 'Radiotherapy', None)
    ]
    treatment_ids = transform_gen3(item_paths, output_dir=args.output_dir, project_id=args.project_id)
    # print('\n'.join(treatment_ids))

    item_paths = [
        ('source/bcc/treatment_chemotherapy_ohsu.json', 'Chemotherapy', my_callback),
        ('source/bcc/treatment_chemotherapy_manually_entered.json', 'Chemotherapy', my_callback),
    ]
    transform_chemotherapy(item_paths, treatment_ids=treatment_ids, output_dir=args.output_dir, project_id=args.project_id)
    item_paths = [
        'output/bcc/bcc_chemotherapy.json',
    ]
    link = {'name':'treatment', 'backref':'bcc_chemotherapy', 'label':'describes', 'target_type':'treatment',  'multiplicity': 'many_to_one', 'required': False }
    schema_path = generate(item_paths,'bcc_chemotherapy', output_dir='output/bcc', links=[link], callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path)
    print(schema_path)

    item_paths = [
        ('source/bcc/vResectionDate.json', 'Surgery', my_callback),
        ('source/bcc/voncologsurgery.json', 'Surgery', my_callback),
    ]
    transform_surgery(item_paths, treatment_ids=treatment_ids, output_dir=args.output_dir, project_id=args.project_id)
    item_paths = [
        'output/bcc/bcc_surgery.json',
    ]
    link = {'name':'treatment', 'backref':'bcc_surgery', 'label':'describes', 'target_type':'treatment',  'multiplicity': 'many_to_one', 'required': False }
    schema_path = generate(item_paths,'bcc_surgery', output_dir='output/bcc', links=[link], callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path)
    print(schema_path)

コード例 #5

0

ファイルを表示

                     output_dir=args.output_dir,
                     project_id=args.project_id)
    item_paths = [
        'source/bcc/lesion_size.json',
    ]
    link = {
        'name': 'observation',
        'backref': 'bcc_lesion',
        'label': 'describes',
        'target_type': 'observation',
        'multiplicity': 'many_to_one',
        'required': False
    }
    schema_path = generate(item_paths,
                           'bcc_lesion',
                           output_dir='output/bcc',
                           links=[link],
                           callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(
        schema_path)
    print(schema_path)

    item_paths = [
        ('source/bcc/vWeightMonthly.json', 'Weight', my_callback),
        ('source/bcc/weight_ohsu.json', 'Weight', my_callback),
    ]
    transform_weight(item_paths,
                     observation_ids=observation_ids,
                     output_dir=args.output_dir,
                     project_id=args.project_id)
    item_paths = [

コード例 #6

0

ファイルを表示

def my_schema_callback(schema):
    """Remove fields that start with _, fix key names with embedded /, fix id lookups """
    for k in [k for k in schema['properties'] if k.startswith('_')]:
        del schema['properties'][k]
    for k in [k for k in schema['properties'] if '/' in k]:
        schema['properties'][k.split('/')[1]] = schema['properties'][k]
        del schema['properties'][k]
    for k in [k for k in schema['properties'] if k.endswith('_id')]:
        if k in ['project_id', 'submitter_id']:
            continue
        schema['properties'][k.replace('_id', '')] = {'type': ['string', "'null'"]}  # schema['properties'][k]
        del schema['properties'][k]
    # adds the source property
    schema['category'] = 'bcc extention'
    schema['properties']['aliquot'] = {'$ref': '_definitions.yaml#/to_one'}
    return schema

    return schema


if __name__ == "__main__":
    item_paths = ['source/bcc/sample.json']
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args()
    transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback)

    link = {'name':'aliquot', 'backref':'bcc_aliquot', 'label':'derived_from', 'target_type':'aliquot',  'multiplicity': 'many_to_one', 'required': False }
    schema_path = generate(item_paths,'bcc_aliquot', output_dir='output/bcc', links=[link], callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path)
    print(schema_path)

コード例 #7

0

ファイルを表示

    if args.schema:

        def my_callback(schema):
            schema['category'] = 'bcc extention'
            schema['properties']['case'] = {
                '$ref': '_definitions.yaml#/to_one'
            }
            return schema

        item_paths = [
            'output/bcc/submitted_file.json',
        ]

        link = {
            'name': 'cases',
            'backref': 'bcc_submitted_files',
            'label': 'extends',
            'target_type': 'case',
            'multiplicity': 'many_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'bcc_submitted_file',
                               output_dir='output/bcc',
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(
            schema_path), 'should have an schema file {}'.format(schema_path)
        print(schema_path)

コード例 #8

0

ファイルを表示


if __name__ == "__main__":
    item_paths = [
        'source/bcc/sample_genetrails_copy_number_variant.json',
        'source/bcc/sample_genetrails_sequence_variant.json'
    ]
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE,
                          DEFAULT_PROJECT_ID).parse_args()
    transform(item_paths,
              output_dir=args.output_dir,
              experiment_code=args.experiment_code,
              callback=my_callback)

    link = {
        'name': 'aliquot',
        'backref': 'genetrails',
        'label': 'derived_from',
        'target_type': 'aliquot',
        'multiplicity': 'many_to_one',
        'required': False
    }
    schema_path = generate(item_paths,
                           'genetrails_variant',
                           output_dir='output/bcc',
                           links=[link],
                           callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(
        schema_path)
    print(schema_path)

コード例 #9

0

ファイルを表示

    schema['properties']['aliquot'] = {'$ref': '_definitions.yaml#/to_one'}
    return schema


if __name__ == "__main__":
    item_paths = ['source/bcc/WESResults.json']
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE,
                          DEFAULT_PROJECT_ID).parse_args()
    transform(item_paths,
              output_dir=args.output_dir,
              experiment_code=args.experiment_code,
              callback=my_callback)

    item_paths = ['output/bcc/wes_result.json']
    link = {
        'name': 'aliquot',
        'backref': 'wes_result',
        'label': 'derived_from',
        'target_type': 'aliquot',
        'multiplicity': 'many_to_one',
        'required': False
    }
    schema_path = generate(item_paths,
                           'wes_result',
                           output_dir='output/bcc',
                           links=[link],
                           callback=my_schema_callback)
    assert os.path.isfile(schema_path), 'should have an schema file {}'.format(
        schema_path)
    print(schema_path)

コード例 #10

0

ファイルを表示

        item_paths = [
            'output/bcc/bcc_biomarker.json',
        ]

        link = {
            'name': 'cases',
            'backref': 'bcc_biomarkers',
            'label': 'extends',
            'target_type': 'case',
            'multiplicity': 'many_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'bcc_biomarker',
                               output_dir='output/bcc',
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(
            schema_path), 'should have an schema file {}'.format(schema_path)
        print(schema_path)

# [
#   "CA19 Values After Specimen Collection",
#   "Date",
#   "ID_Event",
#   "Order Proc ID",
#   "Participant ID",
#   "_not_available_notes",
#   "_not_available_reason_id",
#   "assay version id",

コード例 #11

0

ファイルを表示

            # diagnosis['*last_known_disease_status'] = disease_status.get(line['Cancer Status'], 'Unknown tumor status')
            # diagnosis['*morphology'] = 'tumor_size={}'.format(line['Tumor Size']) # "None is not of type 'string'")
            # diagnosis['*primary_diagnosis'] = line['Case_ICD::Transformation']
            # diagnosis['*progression_or_recurrence'] = 'unknown' # ['yes', 'no', 'unknown', 'not reported', 'Not Allowed To Collect']
            # diagnosis['*site_of_resection_or_biopsy'] = 'unknown'
            # diagnosis['*tissue_or_organ_of_origin'] = 'pancrease'
            # diagnosis['*tumor_grade'] = 'unknown' #  "None is not of type 'string'")
            # diagnosis['*tumor_stage'] = 'unknown' #  "None is not of type 'string'")
            # diagnosis['*vital_status'] = 'unknown'
            #
            # diagnosis_emitter.write(diagnosis)

    cases_emitter.close()
    demographics_emitter.close()


if __name__ == "__main__":
    item_paths = [
        'source/ccle/Individual.Vertex.json.gz',
        'source/ccle/maf.Individual.Vertex.json.gz'
    ]
    args = default_parser().parse_args()
    transform(item_paths=item_paths, output_dir=args.output_dir)

    # glob.glob("output/bcc/*.json")
    if args.schema:
        schema_path = generate(item_paths,
                               'case',
                               output_dir=DEFAULT_OUTPUT_DIR)
        print(schema_path)

コード例 #12

0

ファイルを表示

    if 'chromosome' in line:
        line['chromosome'] = str(line['chromosome'].replace('chr',''))
    return line


def my_pre_processor(schema):
    """Remove fields that start with _, fix key names with embedded /, fix id lookups """
    for k in [k for k in schema['properties'] if k.startswith('_')]:
        del schema['properties'][k]
    for k in [k for k in schema['properties'] if '/' in k]:
        schema['properties'][k.split('/')[1]] = schema['properties'][k]
        del schema['properties'][k]
    for k in [k for k in schema['properties'] if k.endswith('_id')]:
        if k in ['submitter_id', 'project_id']:
            continue
        schema['properties'][k.replace('_id', '')] = {'type': ['string', "'null'"]}  # schema['properties'][k]
        del schema['properties'][k]
    return schema


if __name__ == "__main__":
    item_paths = ['source/bcc/sample_genetrails_copy_number_variant.json','source/bcc/sample_genetrails_sequence_variant.json']
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args()

    transform(item_paths, project_id=DEFAULT_PROJECT_ID, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback)

    # glob.glob("output/bcc/*.json")
    if args.schema:
        schema_path = generate(item_paths,'allele', output_dir='output/bcc', callback=my_pre_processor)
        print(schema_path)

コード例 #13

0

ファイルを表示

                'answer': answer(row),
            }
            survey_emitter.write(survey)
    survey_emitter.close()


if __name__ == "__main__":
    item_paths = [DEFAULT_INPUT_FILE]
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args()

    transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code)

    p = os.path.join(args.output_dir, 'survey.json')
    assert os.path.isfile(p), 'should have an output file {}'.format(p)
    print(p)

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['category'] = 'hop extention'
            schema['properties']['cases'] = {'$ref': '_definitions.yaml#/to_one'}
            return schema

        item_paths = ['output/hop/survey.json']

        link = {'name':'cases', 'backref':'hop_survey', 'label':'extends', 'target_type':'case',  'multiplicity': 'one_to_one', 'required': False }
        schema_path = generate(item_paths,'hop_survey', output_dir=args.output_dir, links=[link], callback=my_callback)
        assert os.path.isfile(p), 'should have an schema file {}'.format(schema_path)
        print(schema_path)

コード例 #14

0

ファイルを表示

    print(p)

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['properties']['source'] = {'type': 'string'}
            schema['category'] = 'bcc extention'
            schema['properties']['case'] = {
                '$ref': '_definitions.yaml#/to_one'
            }
            return schema

        item_paths = ['output/bcc/bcc_participant.json']
        link = {
            'name': 'case',
            'backref': 'bcc_participants',
            'label': 'extends',
            'target_type': 'case',
            'multiplicity': 'many_to_one',
            'required': False
        }
        schema_path = generate(item_paths,
                               'bcc_participant',
                               output_dir='output/bcc',
                               links=[link],
                               callback=my_callback)
        assert os.path.isfile(p), 'should have an schema file {}'.format(
            schema_path)
        print(schema_path)