def readLibraries(path, sheetName): sheet = iu.readtable([path, sheetName]) # Note, skipping the header row by default # dict to map spreadsheet fields to the Library fields properties = ('model_field','required','default','converter') date_parser = lambda x : util.convertdata(x,date) column_definitions = {'Name': ('name',True), # TODO use the model to determine if req'd 'ShortName': ('short_name',True), 'Library Type':'type', 'Date First Plated': ('date_first_plated',False,None,date_parser), 'Date Data Received':('date_data_received',False,None,date_parser), 'Date Loaded': ('date_loaded',False,None,date_parser), 'Date Publicly Available': ('date_publicly_available',False,None,date_parser), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False) } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 libraries = {} for row in sheet: logger.debug(str(('row raw: ',row))) r = util.make_row(row) logger.debug(str(('row: ',r))) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: library = Library(**initializer) library.save() logger.info(str(('library created', library))) libraries[library.short_name] = library rows += 1 except Exception, e: logger.error(str(('library initializer problem: ', initializer))) raise e
def main(path): """ Read in the OtherReagent """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'OR_ID': 'lincs_id', 'Facility ID': ('facility_id',True), 'OR_Alternate_ID': 'alternate_id', 'OR_Primary_Name': ('name',True), 'OR_Alternate_Name': 'alternative_names', 'OR_Role': 'role', 'OR_Reference': 'reference', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) reagent = OtherReagent(**initializer) reagent.save() logger.info(str(('OtherReagent created: ', reagent))) rows += 1 except Exception, e: logger.error(str(( "Invalid OtherReagent initializer: ", initializer))) raise
def main(path): """ Read in the Protein """ sheet_name = 'HMS-LINCS Kinases' # Note, skipping the header row by default sheet = iu.readtable([path, sheet_name, 1]) properties = ('model_field','required','default','converter') column_definitions = { 'PP_Name':('name',True), 'PP_LINCS_ID':('lincs_id',True,None,lambda x: x[x.index('HMSL')+4:]), 'PP_UniProt_ID':'uniprot_id', 'PP_Alternate_Name':'alternate_name', 'PP_Alternate_Name[2]':'alternate_name_2', 'PP_Provider':'provider', 'PP_Provider_Catalog_ID':'provider_catalog_id', 'PP_Batch_ID':'batch_id', 'PP_Amino_Acid_Sequence':'amino_acid_sequence', 'PP_Gene_Symbol':'gene_symbol', 'PP_Gene_ID':'gene_id', 'PP_Protein_Source':'protein_source', 'PP_Protein_Form':'protein_form', 'PP_Mutation':'mutation', 'PP_Phosphorylation_State':'phosphlorylation', 'PP_Domain':'protein_domain', 'PP_Protein_Purity':'protein_purity', 'PP_Protein_Complex':'protein_complex', 'PP_Isoform':'isoform', 'PP_Protein_Type':'protein_type', 'PP_Source_Organism':'source_organism', 'PP_Reference':'reference', 'Date Data Received':('date_data_received',False,None, util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None, util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False)} # convert the labels to fleshed out dict's, with strategies for optional, # default and converter column_definitions = \ util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(( 'model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) protein = Protein(**initializer) protein.save() logger.info(str(('protein created: ', protein))) rows += 1 except Exception, e: logger.error(str(("Invalid protein initializer: ", initializer, e))) raise
def main(path): """ Read in the Protein """ sheet_name = 'HMS-LINCS Kinases' # Note, skipping the header row by default sheet = iu.readtable([path, sheet_name, 1]) properties = ('model_field','required','default','converter') column_definitions = { 'PP_Name':('name',True), 'PP_LINCS_ID':('facility_id',True,None,lambda x: x[x.index('HMSL')+4:]), 'PP_UniProt_ID':'uniprot_id', 'PP_Alternate_Name':'alternative_names', 'PP_Alternate_Name[2]':'alternate_name_2', 'PP_Provider':'provider', 'PP_Provider_Catalog_ID':'provider_catalog_id', 'PP_Batch_ID':'batch_id', 'PP_Amino_Acid_Sequence':'amino_acid_sequence', 'PP_Gene_Symbol':'gene_symbol', 'PP_Gene_ID':'gene_id', 'PP_Protein_Source':'protein_source', 'PP_Protein_Form':'protein_form', 'PP_Mutation':'mutation', 'PP_Phosphorylation_State':'phosphlorylation', 'PP_Domain':'protein_domain', 'PP_Protein_Purity':'protein_purity', 'PP_Protein_Complex':'protein_complex', 'PP_Isoform':'isoform', 'PP_Protein_Type':'protein_type', 'PP_Source_Organism':'source_organism', 'PP_Reference':'reference', 'Date Data Received':('date_data_received',False,None, util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None, util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False)} # convert the labels to fleshed out dict's, with strategies for optional, # default and converter column_definitions = \ util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(( 'model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) protein = Protein(**initializer) # FIXME: LINCS IDS for Protein protein.lincs_id = protein.facility_id protein.save() logger.info(str(('protein created: ', protein))) rows += 1 # create a default batch - 0 ProteinBatch.objects.create(reagent=protein,batch_id=0) except Exception, e: logger.error(str(("Invalid protein initializer: ", initializer, e))) raise
def main(path): """ Read in the Library and LibraryMapping sheets """ libraries = readLibraries(path,'Library') sheet = iu.readtable([path, 'LibraryMapping']) properties = ('model_field','required','default','converter') column_definitions = {'Facility':('facility_id',False,None, lambda x: util.convertdata(x,int)), 'Salt':('salt_id',False,None, lambda x: util.convertdata(x,int)), 'Batch':('facility_batch_id',False,None, lambda x: util.convertdata(x,int)), 'Is Control':('is_control',False,False,util.bool_converter), 'Plate':('plate',False,None, lambda x: util.convertdata(x,int)), 'Well':'well', 'Library Name':'short_name', 'Concentration': 'concentration', 'Concentration Unit':'concentration_unit' } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) small_molecule_batch_lookup = ('smallmolecule', 'facility_batch_id') library_mapping_lookup = ('smallmolecule_batch','library','is_control','plate','well','concentration','concentration_unit') rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: current_row = rows + 2 r = util.make_row(row) initializer = {} small_molecule_lookup = {'facility_id':None, 'salt_id':None} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],'row',current_row)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value if(model_field in small_molecule_lookup): small_molecule_lookup[model_field]=value if( None not in small_molecule_lookup.values()): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer['smallmolecule'] = sm except Exception, e: raise Exception(str(('sm facility id not found', small_molecule_lookup,e,'row',current_row))) elif(model_field == 'short_name'): try: library = libraries[value] initializer['library'] = library except Exception, e: raise Exception(str(('library short_name not found', value,e,'row',current_row)))
def main(import_file,file_directory,deploy_dir): """ Read in the qc events for batches - version 1 - for small molecule batches """ sheet_name = 'Sheet1' start_row = 0 sheet = iu.readtable([import_file, sheet_name, start_row]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'facility_id': ('facility_id_for',True,None, lambda x: util.convertdata(x,int)), 'salt_id': ('salt_id_for',False,None, lambda x: util.convertdata(x,int)), 'batch_id':('batch_id_for',True,None, lambda x: util.convertdata(x,int)), 'QC event date': ('date',True,None,util.date_converter), 'outcome': ('outcome',True), 'comment': 'comment', 'is_restricted':('is_restricted',False,False,util.bool_converter), 'file1': 'file1', 'file2': 'file2', 'file3': 'file3', 'file4': 'file4', 'file5': 'file5', } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) # store each row in a dict _dict = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) _dict[model_field] = value logger.debug(str(('dict: ', _dict))) files_to_attach = [] for i in range(10): filenameProp = 'file%s'%i; if _dict.get(filenameProp, None): fileprop = _dict[filenameProp] filepath = os.path.join(file_directory,fileprop) if not os.path.exists(filepath): raise Exception(str(('file does not exist:',filepath,'row', rows+start_row))) filename = os.path.basename(filepath) relative_path = fileprop[:fileprop.index(filename)] # Move the file dest_dir = deploy_dir if not dest_dir: dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR if not os.path.isdir(dest_dir): raise Exception(str(('no such deploy directory, please create it', dest_dir))) if relative_path: dest_dir = os.path.join(dest_dir, relative_path) if not os.path.exists(dest_dir): os.makedirs(dest_dir) deployed_path = os.path.join(dest_dir, filename) logger.debug(str(('deploy',filepath, deployed_path))) if os.path.exists(deployed_path): os.remove(deployed_path) copy(filepath,deployed_path) if not os.path.isfile (deployed_path): raise Exception(str(('could not deploy to', deployed_path))) else: logger.debug(str(('successfully deployed to', deployed_path))) files_to_attach.append((filename,relative_path)) initializer = None try: # create the qc record initializer = {key:_dict[key] for key in ['facility_id_for','salt_id_for','batch_id_for','outcome','comment','date']} qc_event = QCEvent(**initializer) qc_event.save() logger.debug(str(('saved', qc_event))) # create attached file records for (filename,relative_path) in files_to_attach: initializer = { 'qc_event':qc_event, 'filename':filename, 'relative_path':relative_path, 'is_restricted':_dict['is_restricted'] } qc_attached_file = QCAttachedFile(**initializer) qc_attached_file.save() logger.debug(str(('created qc attached file', qc_attached_file))) rows += 1 except Exception, e: logger.error(str(("Invalid initializer: ", initializer, 'row', rows+start_row+2, e))) raise
def main(path): """ Read in the Antibody """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'AR_Name': ('name',True), 'AR_LINCS_ID': 'lincs_id', 'AR_Alternative_Name': 'alternative_names', 'AR_Center_ID': ('facility_id', True), 'AR_Target_Protein': 'target_protein_name', 'AR_Target_Protein_ID': 'target_protein_uniprot_id', 'AR_Target_Gene': 'target_gene_name', 'AR_Target_Gene_ID': 'target_gene_id', 'AR_Target_Organism': 'target_organism', 'AR_Immunogen': 'immunogen', 'AR_Immunogen_Sequence': 'immunogen_sequence', 'AR_AntibodyClonality': 'antibody_clonality', 'AR_Source_Organism': 'source_organism', 'AR_Antibody_Isotype': 'antibody_isotype', 'AR_Engineering': 'engineering', 'AR_Antibody_Purity': 'antibody_purity', 'AR_Antibody_Labeling': 'antibody_labeling', 'AR_Recommended_Experiment_Type': 'recommended_experiment_type', 'AR_Relevant_Reference': 'relevant_reference', 'AR_Specificity': 'specificity', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) antibody = Antibody(**initializer) antibody.save() logger.info(str(('antibody created: ', antibody))) rows += 1 except Exception, e: logger.error(str(( "Invalid antibody initializer: ", initializer))) raise
def main(path): """ Read in the Antibody """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 0]) properties = ('model_field','required','default','converter') column_definitions = { 'AR_Name': ('name',True), 'AR_LINCS_ID': 'lincs_id', 'AR_Alternative_Name': 'alternative_names', 'AR_Center_Specific_ID': ('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'AR_Clone_Name': 'clone_name', 'AR_RRID': 'rrid', 'AR_Antibody_Type': 'type', 'target_protein_lincs_id': ( 'target_protein_lincs_id',False,None, lambda x: x[x.index('HMSL')+4:] if x else None ), 'AR_Non-Protein_Target': 'non_protein_target_name', 'AR_Target_Organism': 'target_organism', 'AR_Immunogen': 'immunogen', 'AR_Immunogen_Sequence': 'immunogen_sequence', 'AR_Antibody_Species': 'species', 'AR_Antibody_Clonality': 'clonality', 'AR_Antibody_Isotype': 'isotype', 'AR_Antibody_Production_Source_Organism': 'source_organism', 'AR_Antibody_Production_Details': 'production_details', 'AR_Antibody_Labeling': 'labeling', 'AR_Antibody_Labeling_Details': 'labeling_details', 'AR_Relevant_Citations': 'relevant_citations', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: logger.debug('row %s - %s' %(rows,row)) r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i,properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if(value == None or value == 'None'): value = None if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) if(value and converter != None): value = converter(value) logger.debug('model_field: %s, converted value %r' % (model_field, value) ) initializer[model_field] = value try: logger.debug('row: %s, initializer: %s' % (rows,initializer)) target_protein_lincs_id = initializer.pop('target_protein_lincs_id',None) if target_protein_lincs_id: try: target_protein = Protein.objects.get(lincs_id=target_protein_lincs_id) initializer['target_protein'] = target_protein except ObjectDoesNotExist, e: logger.error('target_protein_lincs_id "%s" does not exist, row: %d' % (target_protein_lincs_id,i)) antibody = Antibody(**initializer) antibody.save() logger.info('antibody created: %s' % antibody) rows += 1 # create a default batch - 0 AntibodyBatch.objects.create(reagent=antibody,batch_id=0) except Exception, e: logger.error("Invalid antibody initializer: %s" % initializer) raise
def main(path): """ Read in the Data Working Group sheets """ logger.info(str(('read field information file', path))) properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'table': 'table', 'field': 'field', 'alias': 'alias', 'queryset': 'queryset', 'show in detail': ('show_in_detail', True, False, util.bool_converter), 'show in list': ('show_in_list', True, False, util.bool_converter), 'show_as_extra_field': ('show_as_extra_field', False, False, util.bool_converter), 'is_lincs_field': ('is_lincs_field', True, False, util.bool_converter), 'is_unrestricted': ('is_unrestricted', False, False, util.bool_converter), 'list_order': ('list_order', True, None, lambda x: util.convertdata(x, int)), 'detail_order': ('detail_order', True, None, lambda x: util.convertdata(x, int)), 'use_for_search_index': ('use_for_search_index', True, False, util.bool_converter), 'Data Working Group version': 'dwg_version', 'Unique ID': ('unique_id', True), 'DWG Field Name': 'dwg_field_name', 'HMS Field Name': 'hms_field_name', 'Related to': 'related_to', 'Description': 'description', 'Importance (1: essential; 2: desirable / recommended; 3: optional)': 'importance', 'Comments': 'comments', 'Ontologies / references considered': 'ontology_reference', 'Link to ontology / reference': 'ontology_reference', 'Additional Notes (for development)': 'additional_notes', } column_definitions = util.fill_in_column_definitions( properties, column_definitions) with open(path) as f: reader = csv.reader(f) labels = reader.next() cols = util.find_columns(column_definitions, labels, all_sheet_columns_required=False) logger.info('delete current table') FieldInformation.objects.all().delete() for j, row in enumerate(reader): logger.debug('row %d: %s', j, row) initializer = {} for i, value in enumerate(row): if i not in cols: logger.info(str(('column out of range', j + 1, i))) continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if converter: logger.debug(str(('using converter', converter, value))) value = converter(value) logger.debug(str(('converted', value))) # Note: must check the value against None, as False is a valid value if value is None: if default != None: value = default # Note: must check the value against None, as False is a valid value if value is None and required is True: raise Exception('Field is required: %s, record: %d' % (properties['column_label'], j + 1)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) if not initializer['field']: logger.warn( str(( 'Note: table entry has no field definition (will be skipped)', initializer, 'current row:', j + 1))) continue lfi = FieldInformation(**initializer) # check if the table/field exists if lfi.table: table = models.get_model(APPNAME, lfi.table) if table: if lfi.field not in map(lambda x: x.name, table._meta.fields): raise Exception(str( ('unknown field: ', lfi.field))) else: raise Exception(str(('unknown table', lfi.table))) lfi.save() logger.info(str(('fieldInformation created:', lfi))) except Exception, e: logger.error( str(("Invalid fieldInformation, initializer so far: ", initializer, 'current row:', j + 1, e))) raise e
def main(path): """ Read in the Antibody Batches """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 1]) properties = ('model_field','required','default','converter') column_definitions = { 'AR_Center_Specific_ID': ('antibody_facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'AR_Center_Batch_ID': ('batch_id',True,None,lambda x:util.convertdata(x,int)), 'AR_Center_Name': 'center_name', 'AR_Provider_Name': 'provider_name', 'AR_Provider_Catalog_ ID': 'provider_catalog_id', 'AR_Provider_Batch_ID': 'provider_batch_id', 'AR_Antibody_Purity': 'antibody_purity', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i,properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug('model_field: %s, converted value %r' % (model_field, value) ) initializer[model_field] = value try: logger.debug('initializer: %s' % initializer) antibody_facility_id = initializer.pop('antibody_facility_id',None) if antibody_facility_id: try: antibody = Antibody.objects.get(facility_id=antibody_facility_id) initializer['reagent'] = antibody except ObjectDoesNotExist, e: logger.error('AR_Center_Specific_ID: "%s" does not exist, row: %d' % (antibody_facility_id,i)) antibody_batch = AntibodyBatch(**initializer) antibody_batch.save() logger.info('antibody batch created: %s' % antibody_batch) rows += 1 except Exception, e: logger.error("Invalid antibody_batch initializer: %s" % initializer) raise
def main(path): sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 1]) properties = ('model_field','required','default','converter') column_definitions = { 'AR_Name': ('name',True), 'AR_LINCS_ID': 'lincs_id', 'AR_Alternative_Name': 'alternative_names', 'AR_Alternative_ID': 'alternative_id', 'AR_Center_Canonical_ID': ( 'facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'AR_Clone_Name': 'clone_name', 'AR_RRID': 'rrid', 'AR_Antibody_Type': 'type', 'target_protein_center_ids': 'target_protein_center_ids', 'AR_Non-Protein_Target': 'non_protein_target_name', 'AR_Target_Organism': 'target_organism', 'other_target_information': 'other_target_information', 'other_human_target_protein_center_ids': 'other_human_target_protein_center_ids', 'AR_Immunogen': 'immunogen', 'AR_Immunogen_Sequence': 'immunogen_sequence', 'AR_Antibody_Species': 'species', 'AR_Antibody_Clonality': 'clonality', 'AR_Antibody_Isotype': 'isotype', 'AR_Antibody_Production_Source_Organism': 'source_organism', 'AR_Antibody_Production_Details': 'production_details', 'AR_Antibody_Labeling': 'labeling', 'AR_Antibody_Labeling_Details': 'labeling_details', 'AR_Relevant_Citations': 'relevant_citations', 'Date Data Received':( 'date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ( 'date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} column_definitions = util.fill_in_column_definitions(properties,column_definitions) cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: logger.debug('row %s - %s' %(rows,row)) r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i,properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if(value == None or value == 'None'): value = None if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) if(value and converter != None): value = converter(value) logger.debug('model_field: %s, converted value %r' % (model_field, value) ) initializer[model_field] = value try: logger.debug('row: %s, initializer: %s' % (rows,initializer)) target_protein_center_ids = initializer.pop( 'target_protein_center_ids',None) other_human_target_protein_center_ids = initializer.pop( 'other_human_target_protein_center_ids',None) antibody = Antibody.objects.create(**initializer) if target_protein_center_ids: ids = [x for x in target_protein_center_ids.split(';')] try: target_proteins = [] for id in ids: id = id[id.index('HMSL')+4:] target_proteins.append( Protein.objects.get(facility_id=id)) antibody.target_proteins = target_proteins except ObjectDoesNotExist, e: logger.error( 'target_protein_center_ids "%s" does not exist, row: %d' % (id,i)) raise if other_human_target_protein_center_ids: ids = [x for x in other_human_target_protein_center_ids.split(';')] try: other_target_proteins = [] for id in ids: id = id[id.index('HMSL')+4:] other_target_proteins.append( Protein.objects.get(facility_id=id)) antibody.other_human_target_proteins = other_target_proteins except ObjectDoesNotExist, e: logger.error( 'other_human_target_protein_center_ids "%s"' ' does not exist, row: %d' % (id,i)) raise antibody.save() logger.info('antibody created: %s' % antibody) rows += 1 # create a default batch - 0 AntibodyBatch.objects.create(reagent=antibody,batch_id=0)
def main(path): """ Read in the smallmolecule batch info """ sheet_name = 'sheet 1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row ]) # Note, skipping the header row by default properties = ('model_field', 'required', 'default', 'converter') column_definitions = { # NOTE: even though these db field are not integers, # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values 'facility_id': ('facility_id', True, None, lambda x: util.convertdata(x, int)), 'salt_id': ('salt_id', True, None, lambda x: util.convertdata(x, int)), 'facility_batch_id': ('batch_id', True, None, lambda x: util.convertdata(x, int)), 'provider': ('provider_name', True), 'provider_catalog_id': 'provider_catalog_id', 'provider_sample_id': 'provider_batch_id', 'chemical_synthesis_reference': 'chemical_synthesis_reference', 'purity': 'purity', 'purity_method': 'purity_method', 'aqueous_solubility': 'aqueous_solubility', # FIXME: should warn the user if no unit is provided when # aqueous_solubility is provided 'aqueous_solubility_unit': 'aqueous_solubility_unit', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug(str(('cols: ', cols))) for row in sheet: r = util.make_row(row) initializer = {} small_molecule_lookup = {'facility_id': None, 'salt_id': None} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) if (model_field in small_molecule_lookup): small_molecule_lookup[model_field] = value if (None not in small_molecule_lookup.values()): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer['reagent'] = sm except Exception, e: logger.error( str(('sm identifiers not found', small_molecule_lookup, 'row', rows + start_row + 2))) raise else: initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) smb = SmallMoleculeBatch(**initializer) smb.save() logger.debug(str(('smb created:', smb))) rows += 1 except Exception, e: logger.error( str(("Invalid smallmolecule batch initializer: ", initializer, 'row', rows + start_row + 2, e))) raise
def main(import_file, file_directory, deploy_dir): """ Read in the qc events for batches - version 1 - for small molecule batches """ sheet_name = "Sheet1" start_row = 0 sheet = iu.readtable([import_file, sheet_name, start_row]) # Note, skipping the header row by default properties = ("model_field", "required", "default", "converter") column_definitions = { "facility_id": ("facility_id_for", True, None, lambda x: util.convertdata(x, int)), "salt_id": ("salt_id_for", False, None, lambda x: util.convertdata(x, int)), "batch_id": ("batch_id_for", True, None, lambda x: util.convertdata(x, int)), "QC event date": ("date", True, None, util.date_converter), "outcome": ("outcome", True), "comment": "comment", "is_restricted": ("is_restricted", False, False, util.bool_converter), "file1": "file1", "file2": "file2", "file3": "file3", "file4": "file4", "file5": "file5", } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(("cols: ", cols))) for row in sheet: r = util.make_row(row) # store each row in a dict _dict = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(("read col: ", i, ", ", properties))) required = properties["required"] default = properties["default"] converter = properties["converter"] model_field = properties["model_field"] logger.debug(str(("raw value", value))) if converter != None: value = converter(value) if value == None: if default != None: value = default if value == None and required == True: raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows)) logger.debug(str(("model_field: ", model_field, ", value: ", value))) _dict[model_field] = value logger.debug(str(("dict: ", _dict))) files_to_attach = [] for i in range(10): filenameProp = "file%s" % i if _dict.get(filenameProp, None): fileprop = _dict[filenameProp] filepath = os.path.join(file_directory, fileprop) if not os.path.exists(filepath): raise Exception(str(("file does not exist:", filepath, "row", rows + start_row))) filename = os.path.basename(filepath) relative_path = fileprop[: fileprop.index(filename)] # Move the file dest_dir = deploy_dir if not dest_dir: dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR if not os.path.isdir(dest_dir): raise Exception(str(("no such deploy directory, please create it", dest_dir))) if relative_path: dest_dir = os.path.join(dest_dir, relative_path) if not os.path.exists(dest_dir): os.makedirs(dest_dir) deployed_path = os.path.join(dest_dir, filename) logger.debug(str(("deploy", filepath, deployed_path))) if os.path.exists(deployed_path): os.remove(deployed_path) copy(filepath, deployed_path) if not os.path.isfile(deployed_path): raise Exception(str(("could not deploy to", deployed_path))) else: logger.debug(str(("successfully deployed to", deployed_path))) files_to_attach.append((filename, relative_path)) initializer = None try: # create the qc record initializer = { key: _dict[key] for key in ["facility_id_for", "salt_id_for", "batch_id_for", "outcome", "comment", "date"] } qc_event = QCEvent(**initializer) qc_event.save() logger.debug(str(("saved", qc_event))) # create attached file records for (filename, relative_path) in files_to_attach: initializer = { "qc_event": qc_event, "filename": filename, "relative_path": relative_path, "is_restricted": _dict["is_restricted"], } qc_attached_file = QCAttachedFile(**initializer) qc_attached_file.save() logger.debug(str(("created qc attached file", qc_attached_file))) rows += 1 except Exception, e: logger.error(str(("Invalid initializer: ", initializer, "row", rows + start_row + 2, e))) raise
def main(path): sheet_name = 'sheet 1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) properties = ('model_field','required','default','converter') column_definitions = { 'facility_id': ( 'facility_id',True,None, lambda x: util.convertdata(x,int)), 'salt_id': ( 'salt_id',True,None, lambda x: util.convertdata(x,int)), 'facility_batch_id':( 'batch_id',True,None, lambda x: util.convertdata(x,int)), 'provider': ('provider_name',False), 'provider_catalog_id':'provider_catalog_id', 'provider_sample_id':'provider_batch_id', 'molecular_weight':( '_molecular_weight',False,None, lambda x: util.convertdata(x, float)), 'molecular_formula':'_molecular_formula', 'chemical_synthesis_reference':'_chemical_synthesis_reference', 'purity':'_purity', 'purity_method':'_purity_method', 'aqueous_solubility':'aqueous_solubility', # FIXME: should warn the user if no unit is provided when # aqueous_solubility is provided 'aqueous_solubility_unit':'aqueous_solubility_unit', 'Date Data Received':( 'date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ( 'date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ( 'date_updated',False,None,util.date_converter), } column_definitions = util.fill_in_column_definitions( properties,column_definitions) cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 for row in sheet: r = util.make_row(row) initializer = {} small_molecule_lookup = {'facility_id':None, 'salt_id':None} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception( 'Field is required: %s, record: %d' % (properties['column_label'],rows)) if(model_field in small_molecule_lookup): small_molecule_lookup[model_field]=value if( None not in small_molecule_lookup.values()): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer['reagent'] = sm except Exception, e: logger.exception( 'sm identifiers not found: %r, row: %d', small_molecule_lookup,rows+start_row+2) raise else: initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) smb = SmallMoleculeBatch(**initializer) smb.save() logger.debug(str(('smb created:', smb))) rows += 1 except Exception, e: logger.exception( 'Invalid smallmolecule batch initializer: %r, row: %d', initializer, rows+start_row+2) raise
def main(path): """ Read in the Antibody """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 0]) properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'AR_Name': ('name', True), 'AR_LINCS_ID': 'lincs_id', 'AR_Alternative_Name': 'alternative_names', 'AR_Center_Specific_ID': ('facility_id', True, None, lambda x: x[x.index('HMSL') + 4:]), 'AR_Clone_Name': 'clone_name', 'AR_RRID': 'rrid', 'AR_Antibody_Type': 'type', 'target_protein_lincs_id': ('target_protein_lincs_id', False, None, lambda x: x[x.index('HMSL') + 4:] if x else None), 'AR_Non-Protein_Target': 'non_protein_target_name', 'AR_Target_Organism': 'target_organism', 'AR_Immunogen': 'immunogen', 'AR_Immunogen_Sequence': 'immunogen_sequence', 'AR_Antibody_Species': 'species', 'AR_Antibody_Clonality': 'clonality', 'AR_Antibody_Isotype': 'isotype', 'AR_Antibody_Production_Source_Organism': 'source_organism', 'AR_Antibody_Production_Details': 'production_details', 'AR_Antibody_Labeling': 'labeling', 'AR_Antibody_Labeling_Details': 'labeling_details', 'AR_Relevant_Citations': 'relevant_citations', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), 'Is Restricted': ('is_restricted', False, False, util.bool_converter) } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: logger.debug('row %s - %s' % (rows, row)) r = util.make_row(row) dict = {} initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i, properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if (value == None or value == 'None'): value = None if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) if (value and converter != None): value = converter(value) logger.debug('model_field: %s, converted value %r' % (model_field, value)) initializer[model_field] = value try: logger.debug('row: %s, initializer: %s' % (rows, initializer)) target_protein_lincs_id = initializer.pop( 'target_protein_lincs_id', None) if target_protein_lincs_id: try: target_protein = Protein.objects.get( lincs_id=target_protein_lincs_id) initializer['target_protein'] = target_protein except ObjectDoesNotExist, e: logger.error( 'target_protein_lincs_id "%s" does not exist, row: %d' % (target_protein_lincs_id, i)) antibody = Antibody(**initializer) antibody.save() logger.info('antibody created: %s' % antibody) rows += 1 # create a default batch - 0 AntibodyBatch.objects.create(reagent=antibody, batch_id=0) except Exception, e: logger.error("Invalid antibody initializer: %s" % initializer) raise
def main(path): """ Read in the Data Working Group sheets """ logger.info("start") book = xlrd.open_workbook(path) #open our xls file, there's lots of extra default options in this call, for logging etc. take a look at the docs #sheet = book.sheets()[0] #book.sheets() returns a list of sheet objects... alternatively... #sheet = book.sheet_by_name("qqqq") #we can pull by name worksheet = book.sheet_by_index(0) #or by the index it has in excel's sheet collection properties = ('model_field','required','default','converter') column_definitions = {'table':'table', 'field':'field', 'alias':'alias', 'queryset':'queryset', 'show in detail':('show_in_detail',True,False,util.bool_converter), 'show in list':('show_in_list',True,False,util.bool_converter), 'show_as_extra_field':('show_as_extra_field',False,False,util.bool_converter), 'is_lincs_field':('is_lincs_field',True,False,util.bool_converter), 'is_unrestricted':('is_unrestricted',False,False,util.bool_converter), 'order':('order',True,None,lambda x:util.convertdata(x,int)), 'use_for_search_index':('use_for_search_index',True,False,util.bool_converter), 'Data Working Group version':'dwg_version', 'Unique ID':('unique_id',True), 'DWG Field Name':'dwg_field_name', 'HMS Field Name':'hms_field_name', 'Related to':'related_to', 'Description':'description', 'Importance (1: essential; 2: desirable / recommended; 3: optional)':'importance', 'Comments':'comments', 'Ontologies / references considered':'ontology_reference', 'Link to ontology / reference':'ontology_reference', 'Additional Notes (for development)':'additional_notes', } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = 0 # note zero indexed row = worksheet.row(curr_row) labels = [] i = -1 while i < num_cells: i += 1 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank # cell_type = worksheet.cell_type(curr_row, curr_cell) labels.append(str(worksheet.cell_value(curr_row, i))) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, labels, all_sheet_columns_required=False) logger.info('delete current table'); FieldInformation.objects.all().delete() rows = 0 while curr_row < num_rows: curr_row += 1 actual_row = curr_row + 1 row = worksheet.row(curr_row) if(logger.isEnabledFor(logging.DEBUG)): logger.debug(str(('row', row))) i = -1 initializer = {} while i < num_cells: i += 1 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank #cell_type = worksheet.cell_type(curr_row, curr_cell) value = unicode(worksheet.cell_value(curr_row, i)) if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): logger.debug(str(('using converter',converter,value))) value = converter(value) logger.debug(str(('converted',value))) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],actual_row)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) #if((initializer['table'] == None and initializer['queryset'] == None ) or if(initializer['field'] == None): logger.warn(str(('Note: table entry has no field definition (will be skipped)', initializer, 'current row:', actual_row))) continue; lfi = FieldInformation(**initializer) # check if the table/field exists if(lfi.table != None): table = models.get_model(APPNAME, lfi.table) if( table != None): if(lfi.field not in map(lambda x: x.name,table._meta.fields) ): raise Exception(str(('unknown field: ', lfi.field))) else: raise Exception(str(('unknown table', lfi.table ))) lfi.save() logger.info(str(('fieldInformation created:', lfi))) rows += 1 except Exception, e: logger.error(str(( "Invalid fieldInformation, initializer so far: ", initializer, 'current row:', actual_row,e))) raise e
def readLibraries(path, sheetName): sheet = iu.readtable([path, sheetName ]) # Note, skipping the header row by default # dict to map spreadsheet fields to the Library fields properties = ('model_field', 'required', 'default', 'converter') date_parser = lambda x: util.convertdata(x, date) column_definitions = { 'Name': ('name', True), # TODO use the model to determine if req'd 'ShortName': ('short_name', True), 'Library Type': 'type', 'Date First Plated': ('date_first_plated', False, None, date_parser), 'Date Data Received': ('date_data_received', False, None, date_parser), 'Date Loaded': ('date_loaded', False, None, date_parser), 'Date Publicly Available': ('date_publicly_available', False, None, date_parser), 'Most Recent Update': ('date_updated', False, None, util.date_converter), 'Is Restricted': ('is_restricted', False, False) } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 libraries = {} for row in sheet: logger.debug(str(('row raw: ', row))) r = util.make_row(row) logger.debug(str(('row: ', r))) initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) initializer[model_field] = value try: library = Library(**initializer) library.save() logger.info(str(('library created', library))) libraries[library.short_name] = library rows += 1 except Exception, e: logger.error(str(('library initializer problem: ', initializer))) raise e
def main(path, do_precursors_only): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # allow for informational header row properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_LINCS_ID':'lincs_id', 'CL_Alternate_Name':'alternative_names', 'CL_Alternate_ID':'alternative_id', 'Precursor_Cell':'precursor_facility_batch_id', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Donor_Sex': 'donor_sex', 'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)), 'CL_Donor_Ethnicity': 'donor_ethnicity', 'CL_Donor_Health_Status': 'donor_health_status', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Production_Details': 'production_details', 'CL_Genetic_Modification':'genetic_modification', 'CL_Known_Mutations':'mutations_known', 'CL_Mutation_Citations':'mutation_citations', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Growth_Properties':'growth_properties', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Relevant_Citations': 'relevant_citations', 'Usage Note': 'usage_note', 'CL_Reference_Source': 'reference_source', 'Reference Source URL': 'reference_source_url', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 precursor_map = {} precursor_pattern = re.compile(r'HMSL(5\d{4})-(\d+)') for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] value = convertdata(value) if value is not None: if converter: try: value = converter(value) except Exception: logger.error('field parse error: %r, value: %r, row: %d', properties['column_label'],value,rows+2) raise if value is None: if default is not None: value = default if value is None and required: raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug('model_field: %r, value: %r' , model_field, value) initializer[model_field] = value precursor_facility_batch_id = initializer.pop('precursor_facility_batch_id') if precursor_facility_batch_id: match = precursor_pattern.match(precursor_facility_batch_id) if not match: raise Exception('Invalid precursor pattern: needs: %s: %r, row: %d' % (precursor_pattern, initializer, rows)) precursor_map[initializer['facility_id']] = (match.group(1),match.group(2)) if not do_precursors_only: try: logger.info('initializer: %r', initializer) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) # create a default batch - 0 CellBatch.objects.create(reagent=cell,batch_id=0) except Exception, e: print "Invalid Cell, name: ", r[0] raise e rows += 1
def main(path): """ Read in the Library and LibraryMapping sheets """ libraries = readLibraries(path, 'Library') sheet = iu.readtable([path, 'LibraryMapping']) properties = ('model_field', 'required', 'default', 'converter') date_parser = lambda x: util.convertdata(x, date) column_definitions = { 'Facility': ('facility_id', False, None, lambda x: util.convertdata(x, int)), 'Salt': ('salt_id', False, None, lambda x: util.convertdata(x, int)), 'Batch': ('batch_id', False, None, lambda x: util.convertdata(x, int)), 'Is Control': ('is_control', False, False, util.bool_converter), 'Plate': ('plate', False, None, lambda x: util.convertdata(x, int)), 'Well': 'well', 'Library Name': 'short_name', 'Concentration': 'concentration', 'Concentration Unit': 'concentration_unit' } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) small_molecule_batch_lookup = ('reagent', 'batch_id') library_mapping_lookup = ('smallmolecule_batch', 'library', 'is_control', 'plate', 'well', 'concentration', 'concentration_unit') rows = 0 logger.debug(str(('cols: ', cols))) for row in sheet: current_row = rows + 2 r = util.make_row(row) initializer = {} small_molecule_lookup = {'facility_id': None, 'salt_id': None} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception( 'Field is required: %s, record: %d' % (properties['column_label'], 'row', current_row)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) initializer[model_field] = value if (model_field in small_molecule_lookup): small_molecule_lookup[model_field] = value if (None not in small_molecule_lookup.values()): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer['reagent'] = sm except Exception, e: raise Exception( str(('sm facility id not found', small_molecule_lookup, e, 'row', current_row))) elif (model_field == 'short_name'): try: library = libraries[value] initializer['library'] = library except Exception, e: raise Exception( str(('library short_name not found', value, e, 'row', current_row)))
def main(path): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_LINCS_ID':'lincs_id', 'CL_Alternate_Name':'alternative_names', 'CL_Alternate_ID':'alternate_id', 'CL_Center_Specific_ID':'center_specific_id', 'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)), 'Assay':'assay', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Donor_Sex': 'donor_sex', 'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)), 'CL_Donor_Ethnicity': 'donor_ethnicity', 'CL_Donor_Health_Status': 'donor_health_status', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Growth_Properties':'growth_properties', 'CL_Genetic_Modification':'genetic_modification', 'CL_Related_Projects':'related_projects', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Known_Mutations':'mutations_known', 'CL_Mutations_Citations':'mutations_citations', 'CL_Molecular_Features': 'molecular_features', 'CL_Relevant_Citations': 'relevant_citations', 'CL_Reference_Source': 'reference_source', 'CL_Reference_Source_ID': 'reference_source_id', 'Reference Source URL': 'reference_source_url', 'Usage Note': 'usage_note', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) rows += 1 # create a default batch - 0 CellBatch.objects.create(reagent=cell,batch_id=0) except Exception, e: print "Invalid Cell, name: ", r[0] raise e
def main(path): """ Read in the smallmolecule batch info """ sheet_name = "sheet 1" start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default properties = ("model_field", "required", "default", "converter") column_definitions = { # NOTE: even though these db field are not integers, # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values "facility_id": ("facility_id", True, None, lambda x: util.convertdata(x, int)), "salt_id": ("salt_id", True, None, lambda x: util.convertdata(x, int)), "facility_batch_id": ("facility_batch_id", True, None, lambda x: util.convertdata(x, int)), "provider": ("provider", True), "provider_catalog_id": "provider_catalog_id", "provider_sample_id": "provider_sample_id", "chemical_synthesis_reference": "chemical_synthesis_reference", "purity": "purity", "purity_method": "purity_method", "aqueous_solubility": "aqueous_solubility", "aqueous_solubility_unit": "aqueous_solubility_unit", "Date Data Received": ("date_data_received", False, None, util.date_converter), "Date Loaded": ("date_loaded", False, None, util.date_converter), "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(("cols: ", cols))) for row in sheet: r = util.make_row(row) initializer = {} small_molecule_lookup = {"facility_id": None, "salt_id": None} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(("read col: ", i, ", ", properties))) required = properties["required"] default = properties["default"] converter = properties["converter"] model_field = properties["model_field"] # Todo, refactor to a method logger.debug(str(("raw value", value))) if converter != None: value = converter(value) if value == None: if default != None: value = default if value == None and required == True: raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows)) logger.debug(str(("model_field: ", model_field, ", value: ", value))) if model_field in small_molecule_lookup: small_molecule_lookup[model_field] = value if None not in small_molecule_lookup.values(): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer["smallmolecule"] = sm except Exception, e: logger.error( str(("sm identifiers not found", small_molecule_lookup, "row", rows + start_row + 2)) ) raise else: initializer[model_field] = value try: logger.debug(str(("initializer: ", initializer))) smb = SmallMoleculeBatch(**initializer) smb.save() logger.debug(str(("smb created:", smb))) rows += 1 except Exception, e: logger.error( str(("Invalid smallmolecule batch initializer: ", initializer, "row", rows + start_row + 2, e)) ) raise
def main(path): """ Read in the primary cell batch info """ sheet_name = "Sheet1" start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default properties = ("model_field", "required", "default", "converter") column_definitions = { "Facility ID": ("facility_id", True, None, lambda x: x[x.index("HMSL") + 4 :]), "PC_Center_Batch_ID": ("batch_id", True, None, lambda x: util.convertdata(x, int)), "PC_Center_Specific_Code": "center_specific_code", "PC_Provider_Name": "provider_name", "PC_Provider_Catalog_ID": "provider_catalog_id", "PC_Provider_Batch_ID": "provider_batch_id", "PC_Source_Information": "source_information", "PC_Date_Received": "date_received", "PC_Quality_Verification": "quality_verification", "PC_Culture_Conditions": "culture_conditions", "PC_Passage_Number": ("passage_number", False, None, lambda x: util.convertdata(x, int)), "PC_Transient_Modification": "transient_modification", "Date Data Received": ("date_data_received", False, None, util.date_converter), "Date Loaded": ("date_loaded", False, None, util.date_converter), "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter), "Most Recent Update": ("date_updated", False, None, util.date_converter), } column_definitions = util.fill_in_column_definitions(properties, column_definitions) cols = util.find_columns(column_definitions, sheet.labels) rows = 0 for row in sheet: r = util.make_row(row) initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] required = properties["required"] default = properties["default"] converter = properties["converter"] model_field = properties["model_field"] if converter != None: value = converter(value) if value == None: if default != None: value = default if value == None and required == True: raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows)) if model_field == "facility_id": try: cell = PrimaryCell.objects.get(facility_id=value) initializer["reagent"] = cell except: logger.exception("Primary Cell not found: %r, row: %d", value, rows + start_row + 1) raise else: initializer[model_field] = value try: logger.debug("initializer: %r", initializer) cell = PrimaryCellBatch(**initializer) cell.save() logger.debug("primary cell batch created: %r", cell) rows += 1 except Exception, e: logger.exception("Invalid Primary CellBatch initializer: %r, row: %d", initializer, rows + start_row + 1) raise
def main(path): """ Read in the smallmolecule batch info """ sheet_name = 'sheet 1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { # NOTE: even though these db field are not integers, # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values 'facility_id': ('facility_id',True,None, lambda x: util.convertdata(x,int)), 'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)), 'facility_batch_id':('batch_id',True,None, lambda x: util.convertdata(x,int)), 'provider': ('provider_name',True), 'provider_catalog_id':'provider_catalog_id', 'provider_sample_id':'provider_batch_id', 'chemical_synthesis_reference':'chemical_synthesis_reference', 'purity':'purity', 'purity_method':'purity_method', 'aqueous_solubility':'aqueous_solubility', # FIXME: should warn the user if no unit is provided when # aqueous_solubility is provided 'aqueous_solubility_unit':'aqueous_solubility_unit', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} small_molecule_lookup = {'facility_id':None, 'salt_id':None} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) if(model_field in small_molecule_lookup): small_molecule_lookup[model_field]=value if( None not in small_molecule_lookup.values()): try: sm = SmallMolecule.objects.get(**small_molecule_lookup) initializer['reagent'] = sm except Exception, e: logger.error(str(('sm identifiers not found', small_molecule_lookup,'row',rows+start_row+2))) raise else: initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) smb = SmallMoleculeBatch(**initializer) smb.save() logger.debug(str(('smb created:', smb))) rows += 1 except Exception, e: logger.error(str(( "Invalid smallmolecule batch initializer: ", initializer, 'row', rows+start_row+2, e))) raise
def main(path): """ Read in the Cell """ sheet_name = 'HMS-LINCS cell line metadata' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Name':('name',True), 'CL_ID':'cl_id', 'CL_Alternate_Name':'alternate_name', 'CL_Alternate_ID':'alternate_id', 'CL_Center_Name':'center_name', 'CL_Center_Specific_ID':'center_specific_id', 'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)), 'Assay':'assay', 'CL_Provider_Name':'provider_name', 'CL_Provider_Catalog_ID':'provider_catalog_id', 'CL_Batch_ID':'batch_id', 'CL_Organism':'organism', 'CL_Organ':'organ', 'CL_Tissue':'tissue', 'CL_Cell_Type':'cell_type', 'CL_Cell_Type_Detail':'cell_type_detail', 'CL_Disease':'disease', 'CL_Disease_Detail':'disease_detail', 'CL_Growth_Properties':'growth_properties', 'CL_Genetic_Modification':'genetic_modification', 'CL_Related_Projects':'related_projects', 'CL_Recommended_Culture_Conditions':'recommended_culture_conditions', 'CL_Verification_Profile':'verification_profile', 'CL_Verification_Reference_Profile':'verification_reference_profile', 'CL_Mutations_Reference':'mutations_reference', 'CL_Mutations_Explicit':'mutations_explicit', 'CL_Organism_Gender':'organism_gender', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter)} # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = Cell(**initializer) cell.save() logger.info(str(('cell created:', cell))) rows += 1 except Exception, e: print "Invalid Cell, name: ", r[0] raise e
def main(path): """ Read in the cell batch info """ sheet_name = 'Sheet1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row ]) # Note, skipping the header row by default properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'Facility ID': ('facility_id', True, None, lambda x: x[x.index('HMSL') + 4:]), 'CL_Batch_ID': ('batch_id', True, None, lambda x: util.convertdata(x, int)), 'CL_Provider_Name': 'provider_name', 'CL_Provider_Batch_ID': 'provider_batch_id', 'CL_Provider_Catalog_ID': 'provider_catalog_id', 'CL_Quality_Verification': 'quality_verification', 'CL_Transient_Modification': 'transient_modification', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ', cols))) for row in sheet: r = util.make_row(row) initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) if model_field == 'facility_id': try: cell = Cell.objects.get(facility_id=value) initializer['reagent'] = cell except: logger.error( str(("Cell not found", value, 'row', rows + start_row + 2))) raise else: initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = CellBatch(**initializer) cell.save() logger.debug(str(('cell created:', cell))) rows += 1 except Exception, e: logger.error( str(("Invalid CellBatch initializer: ", initializer, 'row', rows + start_row + 2, e))) raise
def main(path): """ Read in the Protein """ sheet_name = "HMS-LINCS Kinases" sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ("model_field", "required", "default", "converter") column_definitions = { "PP_Name": ("name", True), "PP_LINCS_ID": ("lincs_id", True, None, lambda x: x[x.index("HMSL") + 4 :]), "PP_UniProt_ID": "uniprot_id", "PP_Alternate_Name": "alternate_name", "PP_Alternate_Name[2]": "alternate_name_2", "PP_Provider": "provider", "PP_Provider_Catalog_ID": "provider_catalog_id", "PP_Batch_ID": "batch_id", "PP_Amino_Acid_Sequence": "amino_acid_sequence", "PP_Gene_Symbol": "gene_symbol", "PP_Gene_ID": "gene_id", "PP_Protein_Source": "protein_source", "PP_Protein_Form": "protein_form", "PP_Protein_Purity": "protein_purity", "PP_Protein_Complex": "protein_complex", "PP_Isoform": "isoform", "PP_Protein_Type": "protein_type", "PP_Source_Organism": "source_organism", "PP_Reference": "reference", "Date Data Received": ("date_data_received", False, None, util.date_converter), "Date Loaded": ("date_loaded", False, None, util.date_converter), "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter), "Is Restricted": ("is_restricted", False, False), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(("cols: ", cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(("read col: ", i, ", ", properties))) required = properties["required"] default = properties["default"] converter = properties["converter"] model_field = properties["model_field"] # Todo, refactor to a method logger.debug(str(("raw value", value))) if converter != None: value = converter(value) if value == None: if default != None: value = default if value == None and required == True: raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows)) logger.debug(str(("model_field: ", model_field, ", value: ", value))) initializer[model_field] = value try: logger.debug(str(("initializer: ", initializer))) protein = Protein(**initializer) protein.save() logger.info(str(("protein created: ", protein))) rows += 1 except Exception, e: logger.error(str(("Invalid protein initializer: ", initializer))) raise
def read_metadata(meta_sheet): properties = ('model_field', 'required', 'default', 'converter') field_definitions = { 'Lead Screener First': 'lead_screener_firstname', 'Lead Screener Last': 'lead_screener_lastname', 'Lead Screener Email': 'lead_screener_email', 'Lab Head First': 'lab_head_firstname', 'Lab Head Last': 'lab_head_lastname', 'Lab Head Email': 'lab_head_email', 'Title': 'title', 'Facility ID': ( 'facility_id', True, None, lambda x: util.convertdata(x, int)), 'Summary': 'summary', 'Protocol': 'protocol', 'References': 'protocol_references', 'Date Data Received':( 'date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ( 'date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ( 'date_updated', False, None, util.date_converter), 'Is Restricted':('is_restricted', False, False, util.bool_converter), 'Dataset Type':('dataset_type', False), 'Bioassay':('bioassay', False), 'Dataset Keywords':('dataset_keywords', False), 'Usage Message':('usage_message', False), 'Dataset Data URL':('dataset_data_url', False), 'Associated Publication': ('associated_publication', False), 'Associated Project Summary': ('associated_project_summary', False), } sheet_labels = [] for i in xrange(meta_sheet.nrows-1): row = meta_sheet.row_values(i+1) sheet_labels.append(row[0]) field_definitions = util.fill_in_column_definitions( properties, field_definitions) cols = util.find_columns(field_definitions, sheet_labels, all_column_definitions_required=False) initializer = {} for i in xrange(meta_sheet.nrows-1): row = meta_sheet.row_values(i+1) properties = cols[i] value = row[1] logger.debug('Metadata raw value %r' % value) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] if converter: value = converter(value) if not value and default != None: value = default if not value and required: raise Exception( 'Field is required: %s, record: %d' % (properties['column_label'], row)) logger.debug('model_field: %s, value: %r' % ( model_field, value ) ) initializer[model_field] = value return initializer
def read_metadata(path): """ Read in the DataSets, Datacolumns, and Data sheets. In the Data sheet, rows are DataRecords, and columns are DataPoints """ # Read in the DataSet sheetname = 'Meta' # Note, skipping the header row by default metaSheet = iu.readtable([path, sheetname]) # Define the Column Names -> model fields mapping properties = ('model_field','required','default','converter') field_definitions = {'Lead Screener First': 'lead_screener_firstname', 'Lead Screener Last': 'lead_screener_lastname', 'Lead Screener Email': 'lead_screener_email', 'Lab Head First': 'lab_head_firstname', 'Lab Head Last': 'lab_head_lastname', 'Lab Head Email': 'lab_head_email', 'Title': 'title', 'Facility ID': ('facility_id',True,None, lambda x: util.convertdata(x,int)), 'Summary': 'summary', 'Protocol': 'protocol', 'References': 'protocol_references', 'Date Data Received':('date_data_received',False,None, util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None, util.date_converter), 'Most Recent Update': ('date_updated',False,None, util.date_converter), 'Is Restricted':('is_restricted',False,False,util.bool_converter), 'Dataset Type':('dataset_type',False), 'Bioassay':('bioassay',False), 'Dataset Keywords':('dataset_keywords',False), 'Usage Message':('usage_message',False), } sheet_labels = [] for row in metaSheet: rowAsUnicode = util.make_row(row) sheet_labels.append(rowAsUnicode[0]) # convert the definitions to fleshed out dict's, with strategies for # optional, default and converter field_definitions = \ util.fill_in_column_definitions(properties,field_definitions) # create a dict mapping the column/row ordinal to the proper definition dict cols = util.find_columns(field_definitions, sheet_labels, all_column_definitions_required=False) initializer = {} for i,row in enumerate(metaSheet): rowAsUnicode = util.make_row(row) properties = cols[i] value = rowAsUnicode[1] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],row)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value return initializer
def main(path): """ Read in the Antibody Batches """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 0]) properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'AR_Center_Specific_ID': ('antibody_facility_id', True, None, lambda x: x[x.index('HMSL') + 4:]), 'AR_Batch_ID': ('batch_id', True, None, lambda x: util.convertdata(x, int)), 'AR_Provider_Name': 'provider_name', 'AR_Provider_Catalog_ ID': 'provider_catalog_id', 'AR_Provider_Batch_ID': 'provider_batch_id', 'AR_Antibody_Purity': 'antibody_purity', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i, properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug('model_field: %s, converted value %r' % (model_field, value)) initializer[model_field] = value try: logger.debug('initializer: %s' % initializer) antibody_facility_id = initializer.pop('antibody_facility_id', None) if antibody_facility_id: try: antibody = Antibody.objects.get( facility_id=antibody_facility_id) initializer['reagent'] = antibody except ObjectDoesNotExist, e: logger.error( 'AR_Center_Specific_ID: "%s" does not exist, row: %d' % (antibody_facility_id, i)) antibody_batch = AntibodyBatch(**initializer) antibody_batch.save() logger.info('antibody batch created: %s' % antibody_batch) rows += 1 except Exception, e: logger.error("Invalid antibody_batch initializer: %s" % initializer) raise
def main(path): """ Read in the OtherReagent """ sheet_name = 'Sheet1' sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'OR_ID': 'lincs_id', 'Facility ID': ('facility_id', True), 'OR_Alternate_ID': 'alternate_id', 'OR_Primary_Name': ('name', True), 'OR_Alternate_Name': 'alternative_names', 'OR_Role': 'role', 'OR_Reference': 'reference', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), 'Is Restricted': ('is_restricted', False, False) } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ', cols))) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) reagent = OtherReagent(**initializer) reagent.save() logger.info(str(('OtherReagent created: ', reagent))) rows += 1 # create a default batch - 0 OtherReagentBatch.objects.create(reagent=reagent, batch_id=0) except Exception, e: logger.error( str(("Invalid OtherReagent initializer: ", initializer))) raise
def read_metadata(meta_sheet): properties = ('model_field', 'required', 'default', 'converter') field_definitions = { 'Lead Screener First': 'lead_screener_firstname', 'Lead Screener Last': 'lead_screener_lastname', 'Lead Screener Email': 'lead_screener_email', 'Lab Head First': 'lab_head_firstname', 'Lab Head Last': 'lab_head_lastname', 'Lab Head Email': 'lab_head_email', 'Title': 'title', 'Facility ID': ('facility_id', True, None, lambda x: util.convertdata(x, int)), 'Summary': 'summary', 'Protocol': 'protocol', 'References': 'protocol_references', 'Date Data Received': ('date_data_received', False, None, util.date_converter), 'Date Loaded': ('date_loaded', False, None, util.date_converter), 'Date Publicly Available': ('date_publicly_available', False, None, util.date_converter), 'Most Recent Update': ('date_updated', False, None, util.date_converter), 'Is Restricted': ('is_restricted', False, False, util.bool_converter), 'Dataset Type': ('dataset_type', False), 'Bioassay': ('bioassay', False), 'Dataset Keywords': ('dataset_keywords', False), 'Usage Message': ('usage_message', False), 'Associated Publication': ('associated_publication', False), 'Associated Project Summary': ('associated_project_summary', False), } sheet_labels = [] for i in xrange(meta_sheet.nrows - 1): row = meta_sheet.row_values(i + 1) sheet_labels.append(row[0]) field_definitions = util.fill_in_column_definitions( properties, field_definitions) cols = util.find_columns(field_definitions, sheet_labels, all_column_definitions_required=False) initializer = {} for i in xrange(meta_sheet.nrows - 1): row = meta_sheet.row_values(i + 1) properties = cols[i] value = row[1] logger.debug('Metadata raw value %r' % value) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] if converter: value = converter(value) if not value and default != None: value = default if not value and required: raise Exception('Field is required: %s, record: %d' % (properties['column_label'], row)) logger.debug('model_field: %s, value: %r' % (model_field, value)) initializer[model_field] = value return initializer
def main(path): """ Read in the cell batch info """ sheet_name = 'Sheet1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default properties = ('model_field','required','default','converter') column_definitions = { 'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]), 'CL_Batch_ID':('batch_id',True,None,lambda x:util.convertdata(x,int)), 'CL_Provider_Name':'provider_name', 'CL_Provider_Batch_ID':'provider_batch_id', 'CL_Provider_Catalog_ID':'provider_catalog_id', 'CL_Quality_Verification':'quality_verification', 'CL_Transient_Modification': 'transient_modification', 'Date Data Received':('date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ('date_updated',False,None,util.date_converter), } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions(properties,column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ' , cols))) for row in sheet: r = util.make_row(row) initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug(str(('raw value', value))) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % ( properties['column_label'],rows)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) if model_field == 'facility_id': try: cell = Cell.objects.get(facility_id=value) initializer['reagent'] = cell except: logger.error(str(("Cell not found", value, 'row',rows+start_row+2))) raise else: initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) cell = CellBatch(**initializer) cell.save() logger.debug(str(('cell created:', cell))) rows += 1 except Exception, e: logger.error(str(( "Invalid CellBatch initializer: ", initializer, 'row', rows+start_row+2, e))) raise
def main(path): sheet_name = 'sheet 1' start_row = 1 sheet = iu.readtable([path, sheet_name, start_row]) properties = ('model_field','required','default','converter') column_definitions = { 'facility_id': ( 'facility_id',True,None, lambda x: util.convertdata(x,int)), 'facility_batch_id':( 'batch_id',True,None, lambda x: util.convertdata(x,int)), 'provider': ('provider_name',False), 'provider_catalog_id':'provider_catalog_id', 'provider_sample_id':'provider_batch_id', 'Date Data Received':( 'date_data_received',False,None,util.date_converter), 'Date Loaded': ('date_loaded',False,None,util.date_converter), 'Date Publicly Available': ( 'date_publicly_available',False,None,util.date_converter), 'Most Recent Update': ( 'date_updated',False,None,util.date_converter), } column_definitions = util.fill_in_column_definitions( properties,column_definitions) cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False) rows = 0 logger.debug('cols: %s' % cols) for row in sheet: r = util.make_row(row) dict = {} initializer = {} for i,value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug('read col: %d: %s' % (i,properties)) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug('raw value %r' % value) if(converter != None): value = converter(value) if(value == None ): if( default != None ): value = default if(value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows)) logger.debug('model_field: %s, converted value %r' % (model_field, value) ) initializer[model_field] = value try: logger.debug('initializer: %s' % initializer) facility_id = initializer.pop('facility_id',None) try: other_reagent = OtherReagent.objects.get(facility_id=facility_id) initializer['reagent'] = other_reagent except ObjectDoesNotExist, e: logger.error('facility_id: "%s" does not exist, row: %d' % (facility_id,i)) batch = OtherReagentBatch(**initializer) batch.save() logger.debug('batch created: %s', batch) rows += 1 except Exception, e: logger.error("Invalid other_reagent_batch initializer: %s" % initializer) raise
def main(import_file, file_directory, deploy_dir): """ Read in the qc events for batches - version 1 - for small molecule batches """ sheet_name = 'Sheet1' start_row = 0 sheet = iu.readtable([import_file, sheet_name, start_row ]) # Note, skipping the header row by default properties = ('model_field', 'required', 'default', 'converter') column_definitions = { 'facility_id': ('facility_id_for', True, None, lambda x: util.convertdata(x, int)), 'salt_id': ('salt_id_for', False, None, lambda x: util.convertdata(x, int)), 'batch_id': ('batch_id_for', True, None, lambda x: util.convertdata(x, int)), 'QC event date': ('date', True, None, util.date_converter), 'outcome': ('outcome', True), 'comment': 'comment', 'is_restricted': ('is_restricted', False, False, util.bool_converter), 'file1': 'file1', 'file2': 'file2', 'file3': 'file3', 'file4': 'file4', 'file5': 'file5', } # convert the labels to fleshed out dict's, with strategies for optional, default and converter column_definitions = util.fill_in_column_definitions( properties, column_definitions) # create a dict mapping the column ordinal to the proper column definition dict cols = util.find_columns(column_definitions, sheet.labels) rows = 0 logger.debug(str(('cols: ', cols))) for row in sheet: r = util.make_row(row) # store each row in a dict _dict = {} for i, value in enumerate(r): if i not in cols: continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] logger.debug(str(('raw value', value))) if (converter != None): value = converter(value) if (value == None): if (default != None): value = default if (value == None and required == True): raise Exception('Field is required: %s, record: %d' % (properties['column_label'], rows)) logger.debug( str(('model_field: ', model_field, ', value: ', value))) _dict[model_field] = value logger.debug(str(('dict: ', _dict))) files_to_attach = [] for i in range(10): filenameProp = 'file%s' % i if _dict.get(filenameProp, None): fileprop = _dict[filenameProp] filepath = os.path.join(file_directory, fileprop) if not os.path.exists(filepath): raise Exception( str(('file does not exist:', filepath, 'row', rows + start_row))) filename = os.path.basename(filepath) relative_path = fileprop[:fileprop.index(filename)] # Move the file dest_dir = deploy_dir if not dest_dir: dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR if not os.path.isdir(dest_dir): raise Exception( str(('no such deploy directory, please create it', dest_dir))) if relative_path: dest_dir = os.path.join(dest_dir, relative_path) if not os.path.exists(dest_dir): os.makedirs(dest_dir) deployed_path = os.path.join(dest_dir, filename) logger.debug(str(('deploy', filepath, deployed_path))) if os.path.exists(deployed_path): os.remove(deployed_path) copy(filepath, deployed_path) if not os.path.isfile(deployed_path): raise Exception(str( ('could not deploy to', deployed_path))) else: logger.debug( str(('successfully deployed to', deployed_path))) files_to_attach.append((filename, relative_path)) initializer = None try: # create the qc record initializer = { key: _dict[key] for key in [ 'facility_id_for', 'salt_id_for', 'batch_id_for', 'outcome', 'comment', 'date' ] } qc_event = QCEvent(**initializer) qc_event.save() logger.debug(str(('saved', qc_event))) # create attached file records for (filename, relative_path) in files_to_attach: initializer = { 'qc_event': qc_event, 'filename': filename, 'relative_path': relative_path, 'is_restricted': _dict['is_restricted'] } qc_attached_file = QCAttachedFile(**initializer) qc_attached_file.save() logger.debug( str(('created qc attached file', qc_attached_file))) rows += 1 except Exception, e: logger.error( str(("Invalid initializer: ", initializer, 'row', rows + start_row + 2, e))) raise
def main(path): """ Read in the Data Working Group sheets """ logger.info(str(('read field information file', path))) properties = ('model_field','required','default','converter') column_definitions = { 'table':'table', 'field':'field', 'alias':'alias', 'queryset':'queryset', 'show in detail':('show_in_detail',True,False,util.bool_converter), 'show in list':('show_in_list',True,False,util.bool_converter), 'show_as_extra_field':('show_as_extra_field',False,False,util.bool_converter), 'is_lincs_field':('is_lincs_field',True,False,util.bool_converter), 'is_unrestricted':('is_unrestricted',False,False,util.bool_converter), 'list_order':('list_order',True,None,lambda x:util.convertdata(x,int)), 'detail_order':('detail_order',True,None,lambda x:util.convertdata(x,int)), 'use_for_search_index':('use_for_search_index',True,False,util.bool_converter), 'Data Working Group version':'dwg_version', 'Unique ID':('unique_id',True), 'DWG Field Name':'dwg_field_name', 'HMS Field Name':'hms_field_name', 'Related to':'related_to', 'Description':'description', 'Importance (1: essential; 2: desirable / recommended; 3: optional)':'importance', 'Comments':'comments', 'Ontologies / references considered':'ontology_reference', 'Link to ontology / reference':'ontology_reference', 'Additional Notes (for development)':'additional_notes', } column_definitions = util.fill_in_column_definitions( properties,column_definitions) with open(path) as f: reader = csv.reader(f) labels = reader.next() cols = util.find_columns( column_definitions, labels, all_sheet_columns_required=False) logger.info('delete current table'); FieldInformation.objects.all().delete() for j,row in enumerate(reader): logger.debug('row %d: %s', j, row) initializer = {} for i,value in enumerate(row): if i not in cols: logger.info(str(('column out of range',j+1, i))) continue properties = cols[i] logger.debug(str(('read col: ', i, ', ', properties))) required = properties['required'] default = properties['default'] converter = properties['converter'] model_field = properties['model_field'] # Todo, refactor to a method logger.debug(str(('raw value', value))) if converter: logger.debug(str(('using converter',converter,value))) value = converter(value) logger.debug(str(('converted',value))) # Note: must check the value against None, as False is a valid value if value is None: if default != None: value = default # Note: must check the value against None, as False is a valid value if value is None and required is True: raise Exception('Field is required: %s, record: %d' % (properties['column_label'],j+1)) logger.debug(str(('model_field: ' , model_field, ', value: ', value))) initializer[model_field] = value try: logger.debug(str(('initializer: ', initializer))) if not initializer['field']: logger.warn(str(( 'Note: table entry has no field definition (will be skipped)', initializer, 'current row:', j+1))) continue; lfi = FieldInformation(**initializer) # check if the table/field exists if lfi.table: table = models.get_model(APPNAME, lfi.table) if table: if lfi.field not in map(lambda x: x.name,table._meta.fields): raise Exception(str(('unknown field: ', lfi.field))) else: raise Exception(str(('unknown table', lfi.table ))) lfi.save() logger.info(str(('fieldInformation created:', lfi))) except Exception, e: logger.error(str(( "Invalid fieldInformation, initializer so far: ", initializer, 'current row:', j+1,e))) raise e