'submitter_id': case_submitter_id }, 'submitter_id': submitter_id, 'project_id': DEFAULT_PROJECT_ID, 'data_type': row['c_file.mime'], 'md5sum': row['c_file.ETag'], 'file_size': row['c_file.size'], 'file_name': row['c_file.path'], } file_emitter.write(file) file_emitter.close() if __name__ == "__main__": item_paths = [DEFAULT_INPUT_FILE] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code) p = os.path.join(args.output_dir, 'submitted_file.json') assert os.path.isfile(p), 'should have an output file {}'.format(p) print(p) if args.schema: def my_callback(schema): # adds the source property schema['category'] = 'hop extention' schema['properties']['cases'] = {
# diagnosis['*last_known_disease_status'] = disease_status.get(line['Cancer Status'], 'Unknown tumor status') # diagnosis['*morphology'] = 'tumor_size={}'.format(line['Tumor Size']) # "None is not of type 'string'") # diagnosis['*primary_diagnosis'] = line['Case_ICD::Transformation'] # diagnosis['*progression_or_recurrence'] = 'unknown' # ['yes', 'no', 'unknown', 'not reported', 'Not Allowed To Collect'] # diagnosis['*site_of_resection_or_biopsy'] = 'unknown' # diagnosis['*tissue_or_organ_of_origin'] = 'pancrease' # diagnosis['*tumor_grade'] = 'unknown' # "None is not of type 'string'") # diagnosis['*tumor_stage'] = 'unknown' # "None is not of type 'string'") # diagnosis['*vital_status'] = 'unknown' # # diagnosis_emitter.write(diagnosis) cases_emitter.close() demographics_emitter.close() if __name__ == "__main__": item_paths = [ 'source/ccle/Individual.Vertex.json.gz', 'source/ccle/maf.Individual.Vertex.json.gz' ] args = default_parser().parse_args() transform(item_paths=item_paths, output_dir=args.output_dir) # glob.glob("output/bcc/*.json") if args.schema: schema_path = generate(item_paths, 'case', output_dir=DEFAULT_OUTPUT_DIR) print(schema_path)