def build_schema(self): schema = super(CellResource,self).build_schema() schema['fields'] = get_detail_schema(Cell(),['cell']) return schema
def main(path): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_LINCS_ID':'lincs_id', 'CL_Alternate_Name':'alternative_names', 'CL_Alternate_ID':'alternate_id', 'CL_Center_Specific_ID':'center_specific_id', 'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)), 'Assay':'assay', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Donor_Sex': 'donor_sex', 'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)), 'CL_Donor_Ethnicity': 'donor_ethnicity', 'CL_Donor_Health_Status': 'donor_health_status', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Growth_Properties':'growth_properties', 'CL_Genetic_Modification':'genetic_modification', 'CL_Related_Projects':'related_projects', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Known_Mutations':'mutations_known', 'CL_Mutations_Citations':'mutations_citations', 'CL_Molecular_Features': 'molecular_features', 'CL_Relevant_Citations': 'relevant_citations', 'CL_Reference_Source': 'reference_source', 'CL_Reference_Source_ID': 'reference_source_id', 'Reference Source URL': 'reference_source_url', 'Usage Note': 'usage_note', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) rows += 1 # create a default batch - 0 CellBatch.objects.create(reagent=cell,batch_id=0) except Exception, e: print "Invalid Cell, name: ", r[0] raise e
def main(path): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_ID':'cl_id', 'CL_Alternate_Name':'alternate_name', 'CL_Alternate_ID':'alternate_id', 'CL_Center_Name':'center_name', 'CL_Center_Specific_ID':'center_specific_id', 'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)), 'Assay':'assay', 'CL_Provider_Name':'provider_name', 'CL_Provider_Catalog_ID':'provider_catalog_id', 'CL_Batch_ID':'batch_id', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Growth_Properties':'growth_properties', 'CL_Genetic_Modification':'genetic_modification', 'CL_Related_Projects':'related_projects', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Verification_Profile':'verification_profile', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Mutations_Reference':'mutations_reference', 'CL_Mutations_Explicit':'mutations_explicit', 'CL_Organism_Gender':'organism_gender', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) rows += 1 except Exception, e: print "Invalid Cell, name: ", r[0] raise e
def main(path, do_precursors_only): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # allow for informational header row properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_LINCS_ID':'lincs_id', 'CL_Alternate_Name':'alternative_names', 'CL_Alternate_ID':'alternative_id', 'Precursor_Cell':'precursor_facility_batch_id', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Donor_Sex': 'donor_sex', 'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)), 'CL_Donor_Ethnicity': 'donor_ethnicity', 'CL_Donor_Health_Status': 'donor_health_status', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Production_Details': 'production_details', 'CL_Genetic_Modification':'genetic_modification', 'CL_Known_Mutations':'mutations_known', 'CL_Mutation_Citations':'mutation_citations', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Growth_Properties':'growth_properties', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Relevant_Citations': 'relevant_citations', 'Usage Note': 'usage_note', 'CL_Reference_Source': 'reference_source', 'Reference Source URL': 'reference_source_url', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 precursor_map = {} precursor_pattern = re.compile(r'HMSL(5\d{4})-(\d+)') for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] value = convertdata(value) if value is not None: if converter: try: value = converter(value) except Exception: logger.error('field parse error: %r, value: %r, row: %d', properties['column_label'],value,rows+2) raise if value is None: if default is not None: value = default if value is None and required: raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug('model_field: %r, value: %r' , model_field, value) initializer[model_field] = value precursor_facility_batch_id = initializer.pop('precursor_facility_batch_id') if precursor_facility_batch_id: match = precursor_pattern.match(precursor_facility_batch_id) if not match: raise Exception('Invalid precursor pattern: needs: %s: %r, row: %d' % (precursor_pattern, initializer, rows)) precursor_map[initializer['facility_id']] = (match.group(1),match.group(2)) if not do_precursors_only: try: logger.info('initializer: %r', initializer) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) # create a default batch - 0 CellBatch.objects.create(reagent=cell,batch_id=0) except Exception, e: print "Invalid Cell, name: ", r[0] raise e rows += 1