print(p) if args.schema: def my_callback(schema): # adds the source property schema['category'] = 'hop extention' schema['properties']['cases'] = { '$ref': '_definitions.yaml#/to_one' } return schema item_paths = ['output/hop/file.json'] link = { 'name': 'cases', 'backref': 'submitted_file', 'label': 'extends', 'target_type': 'case', 'multiplicity': 'one_to_one', 'required': False } schema_path = generate(item_paths, 'submitted_file', output_dir=args.output_dir, links=[link], callback=my_callback) assert os.path.isfile(p), 'should have an schema file {}'.format( schema_path) print(schema_path)
assert os.path.isfile(p), 'should have an output file {}'.format(p) print(p) if args.schema: def my_callback(schema): # adds the source property schema['properties']['source'] = {'type': 'string'} schema['category'] = 'bcc extention' schema['properties']['demographic'] = { '$ref': '_definitions.yaml#/to_one' } return schema link = { 'name': 'demographic', 'backref': 'bcc_demographic', 'label': 'extends', 'target_type': 'demographic', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_demographic', output_dir='output/bcc', links=[link], callback=my_callback) assert os.path.isfile(p), 'should have an schema file {}'.format( schema_path) print(schema_path)
if args.schema: def my_callback(schema): # adds the source property schema['properties']['source'] = {'type': 'string'} schema['properties']['diagnosis_name'] = { 'type': ['string', 'null'] } schema['category'] = 'bcc extention' schema['properties']['diagnosis'] = { '$ref': '_definitions.yaml#/to_one' } return schema link = { 'name': 'diagnosis', 'backref': 'bcc_diagnosis', 'label': 'extends', 'target_type': 'diagnosis', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_diagnosis', output_dir='output/bcc', links=[link], callback=my_callback) assert os.path.isfile(p), 'should have an schema file {}'.format( schema_path) print(schema_path)
('source/bcc/voncologsurgery.json', 'Surgery', None), ('source/bcc/Radiotherapy.json', 'Radiotherapy', None) ] treatment_ids = transform_gen3(item_paths, output_dir=args.output_dir, project_id=args.project_id) # print('\n'.join(treatment_ids)) item_paths = [ ('source/bcc/treatment_chemotherapy_ohsu.json', 'Chemotherapy', my_callback), ('source/bcc/treatment_chemotherapy_manually_entered.json', 'Chemotherapy', my_callback), ] transform_chemotherapy(item_paths, treatment_ids=treatment_ids, output_dir=args.output_dir, project_id=args.project_id) item_paths = [ 'output/bcc/bcc_chemotherapy.json', ] link = {'name':'treatment', 'backref':'bcc_chemotherapy', 'label':'describes', 'target_type':'treatment', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths,'bcc_chemotherapy', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path) print(schema_path) item_paths = [ ('source/bcc/vResectionDate.json', 'Surgery', my_callback), ('source/bcc/voncologsurgery.json', 'Surgery', my_callback), ] transform_surgery(item_paths, treatment_ids=treatment_ids, output_dir=args.output_dir, project_id=args.project_id) item_paths = [ 'output/bcc/bcc_surgery.json', ] link = {'name':'treatment', 'backref':'bcc_surgery', 'label':'describes', 'target_type':'treatment', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths,'bcc_surgery', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path) print(schema_path)
output_dir=args.output_dir, project_id=args.project_id) item_paths = [ 'source/bcc/lesion_size.json', ] link = { 'name': 'observation', 'backref': 'bcc_lesion', 'label': 'describes', 'target_type': 'observation', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_lesion', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format( schema_path) print(schema_path) item_paths = [ ('source/bcc/vWeightMonthly.json', 'Weight', my_callback), ('source/bcc/weight_ohsu.json', 'Weight', my_callback), ] transform_weight(item_paths, observation_ids=observation_ids, output_dir=args.output_dir, project_id=args.project_id) item_paths = [
def my_schema_callback(schema): """Remove fields that start with _, fix key names with embedded /, fix id lookups """ for k in [k for k in schema['properties'] if k.startswith('_')]: del schema['properties'][k] for k in [k for k in schema['properties'] if '/' in k]: schema['properties'][k.split('/')[1]] = schema['properties'][k] del schema['properties'][k] for k in [k for k in schema['properties'] if k.endswith('_id')]: if k in ['project_id', 'submitter_id']: continue schema['properties'][k.replace('_id', '')] = {'type': ['string', "'null'"]} # schema['properties'][k] del schema['properties'][k] # adds the source property schema['category'] = 'bcc extention' schema['properties']['aliquot'] = {'$ref': '_definitions.yaml#/to_one'} return schema return schema if __name__ == "__main__": item_paths = ['source/bcc/sample.json'] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback) link = {'name':'aliquot', 'backref':'bcc_aliquot', 'label':'derived_from', 'target_type':'aliquot', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths,'bcc_aliquot', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format(schema_path) print(schema_path)
if args.schema: def my_callback(schema): schema['category'] = 'bcc extention' schema['properties']['case'] = { '$ref': '_definitions.yaml#/to_one' } return schema item_paths = [ 'output/bcc/submitted_file.json', ] link = { 'name': 'cases', 'backref': 'bcc_submitted_files', 'label': 'extends', 'target_type': 'case', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_submitted_file', output_dir='output/bcc', links=[link], callback=my_callback) assert os.path.isfile( schema_path), 'should have an schema file {}'.format(schema_path) print(schema_path)
if __name__ == "__main__": item_paths = [ 'source/bcc/sample_genetrails_copy_number_variant.json', 'source/bcc/sample_genetrails_sequence_variant.json' ] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback) link = { 'name': 'aliquot', 'backref': 'genetrails', 'label': 'derived_from', 'target_type': 'aliquot', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'genetrails_variant', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format( schema_path) print(schema_path)
schema['properties']['aliquot'] = {'$ref': '_definitions.yaml#/to_one'} return schema if __name__ == "__main__": item_paths = ['source/bcc/WESResults.json'] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback) item_paths = ['output/bcc/wes_result.json'] link = { 'name': 'aliquot', 'backref': 'wes_result', 'label': 'derived_from', 'target_type': 'aliquot', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'wes_result', output_dir='output/bcc', links=[link], callback=my_schema_callback) assert os.path.isfile(schema_path), 'should have an schema file {}'.format( schema_path) print(schema_path)
item_paths = [ 'output/bcc/bcc_biomarker.json', ] link = { 'name': 'cases', 'backref': 'bcc_biomarkers', 'label': 'extends', 'target_type': 'case', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_biomarker', output_dir='output/bcc', links=[link], callback=my_callback) assert os.path.isfile( schema_path), 'should have an schema file {}'.format(schema_path) print(schema_path) # [ # "CA19 Values After Specimen Collection", # "Date", # "ID_Event", # "Order Proc ID", # "Participant ID", # "_not_available_notes", # "_not_available_reason_id", # "assay version id",
# diagnosis['*last_known_disease_status'] = disease_status.get(line['Cancer Status'], 'Unknown tumor status') # diagnosis['*morphology'] = 'tumor_size={}'.format(line['Tumor Size']) # "None is not of type 'string'") # diagnosis['*primary_diagnosis'] = line['Case_ICD::Transformation'] # diagnosis['*progression_or_recurrence'] = 'unknown' # ['yes', 'no', 'unknown', 'not reported', 'Not Allowed To Collect'] # diagnosis['*site_of_resection_or_biopsy'] = 'unknown' # diagnosis['*tissue_or_organ_of_origin'] = 'pancrease' # diagnosis['*tumor_grade'] = 'unknown' # "None is not of type 'string'") # diagnosis['*tumor_stage'] = 'unknown' # "None is not of type 'string'") # diagnosis['*vital_status'] = 'unknown' # # diagnosis_emitter.write(diagnosis) cases_emitter.close() demographics_emitter.close() if __name__ == "__main__": item_paths = [ 'source/ccle/Individual.Vertex.json.gz', 'source/ccle/maf.Individual.Vertex.json.gz' ] args = default_parser().parse_args() transform(item_paths=item_paths, output_dir=args.output_dir) # glob.glob("output/bcc/*.json") if args.schema: schema_path = generate(item_paths, 'case', output_dir=DEFAULT_OUTPUT_DIR) print(schema_path)
if 'chromosome' in line: line['chromosome'] = str(line['chromosome'].replace('chr','')) return line def my_pre_processor(schema): """Remove fields that start with _, fix key names with embedded /, fix id lookups """ for k in [k for k in schema['properties'] if k.startswith('_')]: del schema['properties'][k] for k in [k for k in schema['properties'] if '/' in k]: schema['properties'][k.split('/')[1]] = schema['properties'][k] del schema['properties'][k] for k in [k for k in schema['properties'] if k.endswith('_id')]: if k in ['submitter_id', 'project_id']: continue schema['properties'][k.replace('_id', '')] = {'type': ['string', "'null'"]} # schema['properties'][k] del schema['properties'][k] return schema if __name__ == "__main__": item_paths = ['source/bcc/sample_genetrails_copy_number_variant.json','source/bcc/sample_genetrails_sequence_variant.json'] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, project_id=DEFAULT_PROJECT_ID, output_dir=args.output_dir, experiment_code=args.experiment_code, callback=my_callback) # glob.glob("output/bcc/*.json") if args.schema: schema_path = generate(item_paths,'allele', output_dir='output/bcc', callback=my_pre_processor) print(schema_path)
'answer': answer(row), } survey_emitter.write(survey) survey_emitter.close() if __name__ == "__main__": item_paths = [DEFAULT_INPUT_FILE] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code) p = os.path.join(args.output_dir, 'survey.json') assert os.path.isfile(p), 'should have an output file {}'.format(p) print(p) if args.schema: def my_callback(schema): # adds the source property schema['category'] = 'hop extention' schema['properties']['cases'] = {'$ref': '_definitions.yaml#/to_one'} return schema item_paths = ['output/hop/survey.json'] link = {'name':'cases', 'backref':'hop_survey', 'label':'extends', 'target_type':'case', 'multiplicity': 'one_to_one', 'required': False } schema_path = generate(item_paths,'hop_survey', output_dir=args.output_dir, links=[link], callback=my_callback) assert os.path.isfile(p), 'should have an schema file {}'.format(schema_path) print(schema_path)
print(p) if args.schema: def my_callback(schema): # adds the source property schema['properties']['source'] = {'type': 'string'} schema['category'] = 'bcc extention' schema['properties']['case'] = { '$ref': '_definitions.yaml#/to_one' } return schema item_paths = ['output/bcc/bcc_participant.json'] link = { 'name': 'case', 'backref': 'bcc_participants', 'label': 'extends', 'target_type': 'case', 'multiplicity': 'many_to_one', 'required': False } schema_path = generate(item_paths, 'bcc_participant', output_dir='output/bcc', links=[link], callback=my_callback) assert os.path.isfile(p), 'should have an schema file {}'.format( schema_path) print(schema_path)