Esempio n. 1
0
def read_metadata(path):
    """
    Read in the DataSets, Datacolumns, and Data sheets.  In the Data sheet, rows are DataRecords, and columns are DataPoints
    """
    # Read in the DataSet
    sheetname = 'Meta'
    # Define the Column Names -> model fields mapping
    labels = {'Lead Screener First': 'lead_screener_firstname',
              'Lead Screener Last': 'lead_screener_lastname',
              'Lead Screener Email': 'lead_screener_email',
              'Lab Head First': 'lab_head_firstname',
              'Lab Head Last': 'lab_head_lastname',
              'Lab Head Email': 'lab_head_email',
              'Title': 'title',
              'Facility ID': 'facility_id',
              'Summary': 'summary',
              'Protocol': 'protocol',
              'References': 'protocol_references'}
    
    metaSheet = iu.readtable([path, sheetname]) # Note, skipping the header row by default
    metaData = {}
    for row in metaSheet:
        rowAsUnicode = util.make_row(row)
        for key,value in labels.items():
            if re.match(key, rowAsUnicode[0], re.M|re.I):
                if key == 'Facility ID':
                    metaData[value] = util.convertdata(rowAsUnicode[1],int)
                else:
                    metaData[value] = rowAsUnicode[1]
    assert len(metaData) == len(labels), 'Meta data sheet does not contain the necessary keys, expected: %s, read: %s' % [labels, metaData]
    
    return metaData            
Esempio n. 2
0
def main(path):
    """
    Read in the Protein
    """
    sheet_name = 'HMS-LINCS Kinases'
    labels = {
        'PP_Name': 'name',
        'PP_LINCS_ID': 'lincs_id',
        'PP_UniProt_ID': 'uniprot_id',
        'PP_Alternate_Name': 'alternate_name',
        'PP_Provider': 'provider',
        'PP_Provider_Catalog_ID': 'provider_catalog_id',
        'PP_Batch_ID': 'batch_id',
        'PP_Amino_Acid_Sequence': 'amino_acid_sequence',
        'PP_Gene_Symbol': 'gene_symbol',
        'PP_Gene_ID': 'gene_id',
        'PP_Protein_Source': 'protein_source',
        'PP_Protein_Form': 'protein_form',
        'PP_Protein_Purity': 'protein_purity',
        'PP_Protein_Complex': 'protein_complex',
        'PP_Isoform': 'isoform',
        'PP_Protein_Type': 'protein_type',
        'PP_Source_Organism': 'source_organism',
        'PP_Reference': 'reference'
    }

    converters = {'lincs_id': lambda x: x[x.index('HMSL') + 4:]}

    sheet = iu.readtable([path, sheet_name,
                          1])  # Note, skipping the header row by default
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    print 'labels: ', sheet.labels
    for i, label in enumerate(sheet.labels):
        if label in labels:
            cols[i] = labels[label]
        else:
            print 'Note: column label not found:', label
            #raise

    rows = 0
    i = 0

    print 'cols: ', cols
    proteins = {}
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            if cols[i] in converters:
                value = converters[cols[i]](value)
            dict[cols[i]] = value
        try:
            protein = Protein(**dict)
            protein.save()
            rows += 1
        except Exception, e:
            print "Invalid Protein, name: ", r[0]
            raise
Esempio n. 3
0
def main(path):
    """
    Read in the Library and LibraryMapping sheets
    """
    libraries = readLibraries(path, 'Library')

    labels = {
        'Facility': 'facility_id',
        'Salt': 'sm_salt',
        'Batch': 'facility_batch_id',
        'Plate': 'plate',
        'Well': 'well',
        'Library Name': 'short_name',
        'Concentration': 'concentration',
        'Concentration Unit': 'concentration_unit'
    }

    small_molecule_lookup = ('facility_id', 'sm_salt', 'facility_batch_id')
    sheet = iu.readtable([path, 'LibraryMapping'])

    #dict to map spreadsheet fields to terms
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    for i, label in enumerate(sheet.labels):
        if label in labels:
            cols[labels[label]] = i
        else:
            print 'Note: column label not found:', label
    rows = 0
    for row in sheet:
        r = util.make_row(row)
        # small molecule
        dict = {}
        for field in small_molecule_lookup:
            dict[field] = util.convertdata(r[cols[field]], int)
        try:
            dict['facility_id'] = 'HMSL' + str(
                dict['facility_id']
            )  # TODO: convert all hmsl id's to integers!!
            sm = SmallMolecule.objects.get(**dict)
        except Exception, e:
            print "Invalid small molecule identifiers: ", dict
            raise
        short_name = r[cols['short_name']]
        if short_name not in libraries:
            print "Library not found: ", short_name
            raise
        lm = {}
        lm['concentration'] = util.convertdata(r[cols['concentration']], float)
        lm['concentration_unit'] = util.convertdata(
            r[cols['concentration_unit']], None)
        lm['plate'] = util.convertdata(r[cols['plate']], int)
        lm['well'] = r[cols['well']]
        lm['small_molecule'] = sm
        lm['library'] = libraries[short_name]
        lm = LibraryMapping(**lm)
        lm.save()
        rows += 1
Esempio n. 4
0
def readLibraries(path, sheetName):
    
    sheet = iu.readtable([path, sheetName]) # Note, skipping the header row by default
    # dict to map spreadsheet fields to the Library fields
    properties = ('model_field','required','default','converter')
    date_parser = lambda x : util.convertdata(x,date)
    column_definitions = {'Name': ('name',True), # TODO use the model to determine if req'd
                          'ShortName': ('short_name',True),
                          'Library Type':'type',
                          'Date First Plated': ('date_first_plated',False,None,date_parser),
                          'Date Data Received':('date_data_received',False,None,date_parser),
                          'Date Loaded': ('date_loaded',False,None,date_parser),
                          'Date Publicly Available': ('date_publicly_available',False,None,date_parser),
                          'Most Recent Update': ('date_updated',False,None,util.date_converter),
                          'Is Restricted':('is_restricted',False,False) }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)
    
    rows = 0    
    libraries = {}
    for row in sheet:
        logger.debug(str(('row raw: ',row)))
        r = util.make_row(row)
        logger.debug(str(('row: ',r)))
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]
            
            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            library = Library(**initializer)
            library.save()
            logger.info(str(('library created', library)))
            libraries[library.short_name] = library
            rows += 1
        except Exception, e:
            logger.error(str(('library initializer problem: ', initializer)))
            raise e
Esempio n. 5
0
def main(path):
    """
    Read in the Protein
    """
    sheet_name = 'HMS-LINCS Kinases'
    labels = { 'PP_Name':'name', 
              'PP_LINCS_ID':'lincs_id', 
              'PP_UniProt_ID':'uniprot_id', 
              'PP_Alternate_Name':'alternate_name',
              'PP_Provider':'provider',
              'PP_Provider_Catalog_ID':'provider_catalog_id',
              'PP_Batch_ID':'batch_id', 
              'PP_Amino_Acid_Sequence':'amino_acid_sequence',
              'PP_Gene_Symbol':'gene_symbol', 
              'PP_Gene_ID':'gene_id',
              'PP_Protein_Source':'protein_source',
              'PP_Protein_Form':'protein_form', 
              'PP_Protein_Purity':'protein_purity', 
              'PP_Protein_Complex':'protein_complex', 
              'PP_Isoform':'isoform', 
              'PP_Protein_Type':'protein_type', 
              'PP_Source_Organism':'source_organism', 
              'PP_Reference':'reference'}
    
    converters = { 'lincs_id': lambda x: x[x.index('HMSL')+4:] }    
    
    sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    print 'labels: ', sheet.labels
    for i,label in enumerate(sheet.labels):
        if label in labels:
            cols[i] = labels[label]
        else:
            print 'Note: column label not found:', label
            #raise
            
    rows = 0    
    i = 0
    
    print 'cols: ' , cols
    proteins = {}
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            if cols[i] in converters:
                value = converters[cols[i]](value)
            dict[cols[i]]= value
        try:
            protein = Protein(**dict)
            protein.save()
            rows += 1
        except Exception, e:
            print "Invalid Protein, name: ", r[0]
            raise
Esempio n. 6
0
def main(path):
    """
    Read in the Library and LibraryMapping sheets
    """
    libraries = readLibraries(path,'Library')
    
    labels = { 'Facility':'facility_id',
               'Salt':'sm_salt',
               'Batch':'facility_batch_id',
               'Plate':'plate',
               'Well':'well',
               'Library Name':'short_name',
               'Concentration': 'concentration',
               'Concentration Unit':'concentration_unit'
               }
    
    small_molecule_lookup = ('facility_id', 'sm_salt', 'facility_batch_id')
    sheet = iu.readtable([path, 'LibraryMapping'])
    
    #dict to map spreadsheet fields to terms
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    for i,label in enumerate(sheet.labels):
        if label in labels:
            cols[labels[label]] = i
        else:
            print 'Note: column label not found:', label    
    rows = 0
    for row in sheet:
        r = util.make_row(row)
        # small molecule
        dict = {}
        for field in small_molecule_lookup:
            dict[field] = util.convertdata(r[cols[field]],int)
        try:
            dict['facility_id'] = 'HMSL' + str(dict['facility_id']) # TODO: convert all hmsl id's to integers!!
            sm = SmallMolecule.objects.get(**dict)
        except Exception, e:
            print "Invalid small molecule identifiers: ", dict
            raise 
        short_name = r[cols['short_name']]
        if short_name not in libraries:
            print "Library not found: ", short_name
            raise
        lm = {}
        lm['concentration'] = util.convertdata(r[cols['concentration']],float)
        lm['concentration_unit'] = util.convertdata(r[cols['concentration_unit']],None)
        lm['plate'] = util.convertdata(r[cols['plate']], int)
        lm['well'] = r[cols['well']]
        lm['small_molecule'] = sm
        lm['library'] = libraries[short_name]
        lm = LibraryMapping(**lm)
        lm.save()
        rows += 1
Esempio n. 7
0
def readLibraries(path, sheetName):

    sheet = iu.readtable([path, sheetName
                          ])  # Note, skipping the header row by default
    # dict to map spreadsheet fields to the Library fields
    labels = {
        'Name': 'name',
        'ShortName': 'short_name',
        'Date First Plated': 'date_first_plated',
        'Date Data Received': 'date_data_received',
        'Date Loaded': 'date_loaded',
        'Date Publicly Available': 'date_publicly_available'
    }
    date_parser = lambda x: util.convertdata(x, date)
    converters = {
        'date_first_plated': date_parser,
        'date_loaded': date_parser,
        'date_data_recieved': date_parser,
        'date_publicly_available': date_parser
    }
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    for i, label in enumerate(sheet.labels):
        if label in labels:
            cols[i] = labels[label]
        else:
            print 'Note: column label not found:', label
            raise

    rows = 0
    i = 0

    libraries = {}
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        for i, value in enumerate(r):
            if cols[i] in converters:
                value = converters[cols[i]](value)
            dict[cols[i]] = value
        try:
            print 'create library:', dict
            library = Library(**dict)
            library.save()
            libraries[library.short_name] = library
            rows += 1
        except Exception, e:
            print "Invalid Library, name: ", r[0]
            raise
Esempio n. 8
0
def readLibraries(path, sheetName):
    
    sheet = iu.readtable([path, sheetName]) # Note, skipping the header row by default
    # dict to map spreadsheet fields to the Library fields
    labels = { 'Name': 'name',
               'ShortName': 'short_name',
               'Date First Plated': 'date_first_plated',
               'Date Data Received':'date_data_received',
               'Date Loaded': 'date_loaded',
               'Date Publicly Available': 'date_publicly_available' }
    date_parser = lambda x : util.convertdata(x,date)
    converters = {'date_first_plated': date_parser,
                  'date_loaded': date_parser,
                  'date_data_recieved': date_parser,
                  'date_publicly_available': date_parser }
    cols = {}
    # first put the label row in (it contains the worksheet column, and its unique)
    for i,label in enumerate(sheet.labels):
        if label in labels:
            cols[i] = labels[label]
        else:
            print 'Note: column label not found:', label
            raise
            
    rows = 0    
    i = 0
    
    libraries = {}
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        for i,value in enumerate(r):
            if cols[i] in converters:
                value = converters[cols[i]](value)
            dict[cols[i]]= value
        try:
            print 'create library:', dict
            library = Library(**dict)
            library.save()
            libraries[library.short_name] = library
            rows += 1
        except Exception, e:
            print "Invalid Library, name: ", r[0]
            raise
Esempio n. 9
0
def readDataColumns(path):
    # Read in the DataColumn Sheet
    sheetname = 'Data Columns'
    dataColumnSheet = iu.readtable([path, sheetname])

    _fields = util.get_fields(DataColumn)
    _typelookup = dict((f.name, iu.totype(f)) for f in _fields)
    
    # TODO: Use the import_utils methods here
    # TODO: compare and combine this with the fieldinformation entity
    labels = {'Worksheet Column':'worksheet_column',
              'Display Order':'display_order',
              'Name':'name',
              'Data Type':'data_type',
              'Decimal Places':'precision',
              'Description':'description',
              'Replicate Number':'replicate',
              'Time point':'time_point', 
              'Assay readout type':'readout_type',
              'Comments':'comments'}

    # create an array of dict's, each dict defines a DataColumn    
    dataColumnDefinitions = []
    # first put the label row in (it contains the worksheet column, and its unique)
    for v in dataColumnSheet.labels[1:]:
        dataColumnDefinitions.append({labels['Worksheet Column']:v})
    # now, for each row, create the appropriate dictionary entry in the dataColumnDefinitions
    for row in dataColumnSheet:
        rowAsUnicode = util.make_row(row)
        keyRead = rowAsUnicode[0]
        for i,cellText in enumerate(rowAsUnicode[1:]):
            for key,fieldName in labels.items():
                if re.match(key,keyRead,re.M|re.I): # if the row is one of the DataColumn fields, then add it to the dict
                    dataColumnDefinitions[i][fieldName] = util.convertdata(cellText,_typelookup.get(fieldName, None)) # Note: convert the data to the model field type
                else:
                    logger.debug(str(( '"Data Column definition not used: ', cellText)) ) 
                    pass
    logger.debug(str(("definitions: ", dataColumnDefinitions)) )
    
    return dataColumnDefinitions
Esempio n. 10
0
def readDataColumns(path):
        # Read in the DataColumn Sheet
    sheetname = 'Data Columns'
    dataColumnSheet = iu.readtable([path, sheetname])

    _fields = util.get_fields(DataColumn)
    _typelookup = dict((f.name, iu.totype(f)) for f in _fields)
    
    labels = {'Worksheet Column':'worksheet_column',
              'Name':'name',
              'Data Type':'data_type',
              'Decimal Places':'precision',
              'Description':'description',
              'Replicate Number':'replicate',
              'Time point':'time_point', 
              'Assay readout type':'readout_type',
              'Comments':'comments'}

    # create an array of dict's, each dict defines a DataColumn    
    dataColumnDefinitions = []
    # first put the label row in (it contains the worksheet column, and its unique)
    for v in dataColumnSheet.labels[1:]:
        dataColumnDefinitions.append({labels['Worksheet Column']:v})
    # now, for each row, create the appropriate dictionary entry in the dataColumnDefinitions
    for row in dataColumnSheet:
        rowAsUnicode = util.make_row(row)
        keyRead = rowAsUnicode[0]
        for i,cellText in enumerate(rowAsUnicode[1:]):
            for key,fieldName in labels.items():
                if re.match(key,keyRead,re.M|re.I): # if the row is one of the DataColumn fields, then add it to the dict
                    dataColumnDefinitions[i][fieldName] = util.convertdata(cellText,_typelookup.get(fieldName, None)) # Note: convert the data to the model field type
                else:
                    pass
                    # print '"Data Column definition not used: ', cellText 
    print "definitions: ", dataColumnDefinitions
    
    return dataColumnDefinitions
Esempio n. 11
0
def main(path, do_precursors_only):
    """
    Read in the Cell
    """
    sheet_name = 'HMS-LINCS cell line metadata'
    sheet = iu.readtable([path, sheet_name, 1]) # allow for informational header row

    properties = ('model_field','required','default','converter')
    column_definitions = {
        'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
        'CL_Name':('name',True),
        'CL_LINCS_ID':'lincs_id',
        'CL_Alternate_Name':'alternative_names',
        'CL_Alternate_ID':'alternative_id',
        'Precursor_Cell':'precursor_facility_batch_id',
        'CL_Organism':'organism',
        'CL_Organ':'organ',
        'CL_Tissue':'tissue',
        'CL_Cell_Type':'cell_type',
        'CL_Cell_Type_Detail':'cell_type_detail',
        'CL_Donor_Sex': 'donor_sex',
        'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)),
        'CL_Donor_Ethnicity': 'donor_ethnicity',
        'CL_Donor_Health_Status': 'donor_health_status',
        'CL_Disease':'disease',
        'CL_Disease_Detail':'disease_detail',
        'CL_Production_Details': 'production_details',
        'CL_Genetic_Modification':'genetic_modification',
        'CL_Known_Mutations':'mutations_known',
        'CL_Mutation_Citations':'mutation_citations',
        'CL_Verification_Reference_Profile':'verification_reference_profile',
        'CL_Growth_Properties':'growth_properties',
        'CL_Recommended_Culture_Conditions':'recommended_culture_conditions',
        'CL_Relevant_Citations': 'relevant_citations',
        'Usage Note': 'usage_note',
        'CL_Reference_Source': 'reference_source',
        'Reference Source URL': 'reference_source_url',
        
        'Date Data Received':('date_data_received',False,None,util.date_converter),
        'Date Loaded': ('date_loaded',False,None,util.date_converter),
        'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
        'Most Recent Update': ('date_updated',False,None,util.date_converter),
        'Is Restricted':('is_restricted',False,False,util.bool_converter)}
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False)
            
    rows = 0    
    precursor_map = {}
    precursor_pattern = re.compile(r'HMSL(5\d{4})-(\d+)')
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']
            
            value = convertdata(value)
            if value is not None:
                if converter:
                    try:
                        value = converter(value)
                    except Exception:
                        logger.error('field parse error: %r, value: %r, row: %d',
                            properties['column_label'],value,rows+2)
                        raise 
            if value is None:
                if default is not None:
                    value = default
            if value is None and required:
                raise Exception('Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))

            logger.debug('model_field: %r, value: %r' , model_field, value)
            initializer[model_field] = value
            
        precursor_facility_batch_id = initializer.pop('precursor_facility_batch_id')
        if precursor_facility_batch_id:
            match = precursor_pattern.match(precursor_facility_batch_id)
            if not match:
                raise Exception('Invalid precursor pattern: needs: %s: %r, row: %d'
                    % (precursor_pattern, initializer, rows))
            precursor_map[initializer['facility_id']] = (match.group(1),match.group(2))
        
        if not do_precursors_only:
            try:
                logger.info('initializer: %r', initializer)
                cell = Cell(**initializer)
                cell.save()
                logger.info(str(('cell created:', cell)))
    
                # create a default batch - 0
                CellBatch.objects.create(reagent=cell,batch_id=0)
                
            except Exception, e:
                print "Invalid Cell, name: ", r[0]
                raise e
        
        rows += 1
Esempio n. 12
0
def read_metadata(path):
    """
    Read in the DataSets, Datacolumns, and Data sheets.  In the Data sheet, rows
    are DataRecords, and columns are DataPoints
    """
    # Read in the DataSet
    sheetname = 'Meta'
    # Note, skipping the header row by default
    metaSheet = iu.readtable([path, sheetname]) 

    # Define the Column Names -> model fields mapping
    properties = ('model_field','required','default','converter')
    field_definitions = {'Lead Screener First': 'lead_screener_firstname',
              'Lead Screener Last': 'lead_screener_lastname',
              'Lead Screener Email': 'lead_screener_email',
              'Lab Head First': 'lab_head_firstname',
              'Lab Head Last': 'lab_head_lastname',
              'Lab Head Email': 'lab_head_email',
              'Title': 'title',
              'Facility ID': ('facility_id',True,None, 
                              lambda x: util.convertdata(x,int)),
              'Summary': 'summary',
              'Protocol': 'protocol',
              'References': 'protocol_references',
              'Date Data Received':('date_data_received',False,None,
                                    util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,
                                          util.date_converter),
              'Most Recent Update': ('date_updated',False,None,
                                      util.date_converter),
              'Is Restricted':('is_restricted',False,False,util.bool_converter),
              'Dataset Type':('dataset_type',False),
              'Bioassay':('bioassay',False),
              'Dataset Keywords':('dataset_keywords',False),
              'Usage Message':('usage_message',False),
              }
    
    sheet_labels = []
    for row in metaSheet:
        rowAsUnicode = util.make_row(row)
        sheet_labels.append(rowAsUnicode[0])

    # convert the definitions to fleshed out dict's, with strategies for 
    # optional, default and converter
    field_definitions = \
        util.fill_in_column_definitions(properties,field_definitions)
    # create a dict mapping the column/row ordinal to the proper definition dict
    cols = util.find_columns(field_definitions, sheet_labels,
                             all_column_definitions_required=False)

    
    initializer = {}
    for i,row in enumerate(metaSheet):
        rowAsUnicode = util.make_row(row)
        properties = cols[i]
        value = rowAsUnicode[1]
        
        logger.debug(str(('read col: ', i, ', ', properties)))
        required = properties['required']
        default = properties['default']
        converter = properties['converter']
        model_field = properties['model_field']

        # Todo, refactor to a method
        logger.debug(str(('raw value', value)))
        if(converter != None):
            value = converter(value)
        if(value == None ):
            if( default != None ):
                value = default
        if(value == None and  required == True):
            raise Exception('Field is required: %s, record: %d' % 
                            (properties['column_label'],row))
        logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
        initializer[model_field] = value

    return initializer 
Esempio n. 13
0
def main(path):
    """
    Read in the smallmolecule batch info
    """
    sheet_name = 'sheet 1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row
                          ])  # Note, skipping the header row by default

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        # NOTE: even though these db field are not integers,
        # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values
        'facility_id':
        ('facility_id', True, None, lambda x: util.convertdata(x, int)),
        'salt_id': ('salt_id', True, None, lambda x: util.convertdata(x, int)),
        'facility_batch_id':
        ('batch_id', True, None, lambda x: util.convertdata(x, int)),
        'provider': ('provider_name', True),
        'provider_catalog_id':
        'provider_catalog_id',
        'provider_sample_id':
        'provider_batch_id',
        'chemical_synthesis_reference':
        'chemical_synthesis_reference',
        'purity':
        'purity',
        'purity_method':
        'purity_method',
        'aqueous_solubility':
        'aqueous_solubility',
        # FIXME: should warn the user if no unit is provided when
        # aqueous_solubility is provided
        'aqueous_solubility_unit':
        'aqueous_solubility_unit',
        'Date Data Received':
        ('date_data_received', False, None, util.date_converter),
        'Date Loaded': ('date_loaded', False, None, util.date_converter),
        'Date Publicly Available':
        ('date_publicly_available', False, None, util.date_converter),
        'Most Recent Update': ('date_updated', False, None,
                               util.date_converter),
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions,
                             sheet.labels,
                             all_sheet_columns_required=False)

    rows = 0
    logger.debug(str(('cols: ', cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {'facility_id': None, 'salt_id': None}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))

            if (model_field in small_molecule_lookup):
                small_molecule_lookup[model_field] = value
                if (None not in small_molecule_lookup.values()):
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer['reagent'] = sm
                    except Exception, e:
                        logger.error(
                            str(('sm identifiers not found',
                                 small_molecule_lookup, 'row',
                                 rows + start_row + 2)))
                        raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            smb = SmallMoleculeBatch(**initializer)
            smb.save()
            logger.debug(str(('smb created:', smb)))
            rows += 1
        except Exception, e:
            logger.error(
                str(("Invalid smallmolecule batch initializer: ", initializer,
                     'row', rows + start_row + 2, e)))
            raise
Esempio n. 14
0
def main(path):
    """
    Read in the Protein
    """
    sheet_name = 'HMS-LINCS Kinases'

    # Note, skipping the header row by default
    sheet = iu.readtable([path, sheet_name, 1]) 

    properties = ('model_field','required','default','converter')
    column_definitions = { 
            'PP_Name':('name',True), 
            'PP_LINCS_ID':('facility_id',True,None,lambda x: x[x.index('HMSL')+4:]), 
            'PP_UniProt_ID':'uniprot_id', 
            'PP_Alternate_Name':'alternative_names',
            'PP_Alternate_Name[2]':'alternate_name_2',
            'PP_Provider':'provider',
            'PP_Provider_Catalog_ID':'provider_catalog_id',
            'PP_Batch_ID':'batch_id', 
            'PP_Amino_Acid_Sequence':'amino_acid_sequence',
            'PP_Gene_Symbol':'gene_symbol', 
            'PP_Gene_ID':'gene_id',
            'PP_Protein_Source':'protein_source',
            'PP_Protein_Form':'protein_form', 
            'PP_Mutation':'mutation', 
            'PP_Phosphorylation_State':'phosphlorylation', 
            'PP_Domain':'protein_domain', 
            'PP_Protein_Purity':'protein_purity', 
            'PP_Protein_Complex':'protein_complex', 
            'PP_Isoform':'isoform', 
            'PP_Protein_Type':'protein_type', 
            'PP_Source_Organism':'source_organism', 
            'PP_Reference':'reference',
            'Date Data Received':('date_data_received',False,None,
                                  util.date_converter),
            'Date Loaded': ('date_loaded',False,None,util.date_converter),
            'Date Publicly Available': ('date_publicly_available',False,None,
                                        util.date_converter),
            'Most Recent Update': ('date_updated',False,None,util.date_converter),
            'Is Restricted':('is_restricted',False,False)}
    
    # convert the labels to fleshed out dict's, with strategies for optional, 
    # default and converter
    column_definitions = \
        util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                                    % (properties['column_label'],rows))
            logger.debug(str((
                'model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            protein = Protein(**initializer)
            
            # FIXME: LINCS IDS for Protein
            protein.lincs_id = protein.facility_id
            
            protein.save()
            logger.info(str(('protein created: ', protein)))
            rows += 1
            
            # create a default batch - 0
            ProteinBatch.objects.create(reagent=protein,batch_id=0)
            
        except Exception, e:
            logger.error(str(("Invalid protein initializer: ", initializer, e)))
            raise
Esempio n. 15
0
def main(path):
    """
    Read in the Library and LibraryMapping sheets
    """
    libraries = readLibraries(path,'Library')
    
    sheet = iu.readtable([path, 'LibraryMapping'])
    properties = ('model_field','required','default','converter')
    column_definitions = {'Facility':('facility_id',False,None, lambda x: util.convertdata(x,int)),
                          'Salt':('salt_id',False,None, lambda x: util.convertdata(x,int)),
                          'Batch':('facility_batch_id',False,None, lambda x: util.convertdata(x,int)),
                          'Is Control':('is_control',False,False,util.bool_converter),
                          'Plate':('plate',False,None, lambda x: util.convertdata(x,int)),
                          'Well':'well',
                          'Library Name':'short_name',
                          'Concentration': 'concentration',
                          'Concentration Unit':'concentration_unit'
                          }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)
    
    small_molecule_batch_lookup = ('smallmolecule', 'facility_batch_id')
    library_mapping_lookup = ('smallmolecule_batch','library','is_control','plate','well','concentration','concentration_unit')
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        current_row = rows + 2
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {'facility_id':None, 'salt_id':None}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],'row',current_row))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            
            initializer[model_field] = value
            
            if(model_field in small_molecule_lookup):
                small_molecule_lookup[model_field]=value
                if( None not in small_molecule_lookup.values()):
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer['smallmolecule'] = sm
                    except Exception, e:
                        raise Exception(str(('sm facility id not found', small_molecule_lookup,e,'row',current_row)))
            elif(model_field == 'short_name'):
                try:
                    library = libraries[value]
                    initializer['library'] = library
                except Exception, e:
                    raise Exception(str(('library short_name not found', value,e,'row',current_row)))
Esempio n. 16
0
def main(import_file,file_directory,deploy_dir):
    """
    Read in the qc events for batches 
    - version 1 - for small molecule batches
    """
    sheet_name = 'Sheet1'
    start_row = 0
    sheet = iu.readtable([import_file, sheet_name, start_row]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              'facility_id': ('facility_id_for',True,None, lambda x: util.convertdata(x,int)),
              'salt_id': ('salt_id_for',False,None, lambda x: util.convertdata(x,int)),
              'batch_id':('batch_id_for',True,None, lambda x: util.convertdata(x,int)),
              'QC event date': ('date',True,None,util.date_converter),
              'outcome': ('outcome',True),
              'comment': 'comment',
              'is_restricted':('is_restricted',False,False,util.bool_converter),
              'file1': 'file1',
              'file2': 'file2',
              'file3': 'file3',
              'file4': 'file4',
              'file5': 'file5',
              }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)
    
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        # store each row in a dict
        _dict = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            _dict[model_field] = value

        logger.debug(str(('dict: ', _dict)))
        
        files_to_attach = []
        for i in range(10):
            filenameProp = 'file%s'%i;
            if _dict.get(filenameProp, None):
                fileprop = _dict[filenameProp]
                filepath = os.path.join(file_directory,fileprop)
                if not os.path.exists(filepath):
                    raise Exception(str(('file does not exist:',filepath,'row',
                        rows+start_row)))
                filename = os.path.basename(filepath)
                relative_path = fileprop[:fileprop.index(filename)]
                
                # Move the file
                dest_dir = deploy_dir
                if not dest_dir:
                    dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR
                if not os.path.isdir(dest_dir):
                    raise Exception(str(('no such deploy directory, please create it', dest_dir)))
                if relative_path:
                    dest_dir = os.path.join(dest_dir, relative_path)
                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)
                deployed_path = os.path.join(dest_dir, filename)
                    
                logger.debug(str(('deploy',filepath, deployed_path)))
                if os.path.exists(deployed_path):
                    os.remove(deployed_path)
                copy(filepath,deployed_path)
                if not os.path.isfile (deployed_path):
                    raise Exception(str(('could not deploy to', deployed_path)))
                else:
                    logger.debug(str(('successfully deployed to', deployed_path)))
                
                files_to_attach.append((filename,relative_path))
        
        initializer = None
        try:
            # create the qc record
            initializer = {key:_dict[key] for key in 
                ['facility_id_for','salt_id_for','batch_id_for','outcome','comment','date']}
            qc_event = QCEvent(**initializer)
            qc_event.save()
            logger.debug(str(('saved', qc_event)))
            
            # create attached file records
            for (filename,relative_path) in files_to_attach:
                initializer = {
                    'qc_event':qc_event,
                    'filename':filename,
                    'relative_path':relative_path,
                    'is_restricted':_dict['is_restricted']
                    }
                qc_attached_file = QCAttachedFile(**initializer)
                qc_attached_file.save()
                logger.debug(str(('created qc attached file', qc_attached_file)))
            
            rows += 1
            
        except Exception, e:
            logger.error(str(("Invalid initializer: ", initializer, 'row', 
                rows+start_row+2, e)))
            raise
Esempio n. 17
0
def main(path):
    """
    Read in the primary cell batch info
    """
    sheet_name = "Sheet1"
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row])  # Note, skipping the header row by default

    properties = ("model_field", "required", "default", "converter")
    column_definitions = {
        "Facility ID": ("facility_id", True, None, lambda x: x[x.index("HMSL") + 4 :]),
        "PC_Center_Batch_ID": ("batch_id", True, None, lambda x: util.convertdata(x, int)),
        "PC_Center_Specific_Code": "center_specific_code",
        "PC_Provider_Name": "provider_name",
        "PC_Provider_Catalog_ID": "provider_catalog_id",
        "PC_Provider_Batch_ID": "provider_batch_id",
        "PC_Source_Information": "source_information",
        "PC_Date_Received": "date_received",
        "PC_Quality_Verification": "quality_verification",
        "PC_Culture_Conditions": "culture_conditions",
        "PC_Passage_Number": ("passage_number", False, None, lambda x: util.convertdata(x, int)),
        "PC_Transient_Modification": "transient_modification",
        "Date Data Received": ("date_data_received", False, None, util.date_converter),
        "Date Loaded": ("date_loaded", False, None, util.date_converter),
        "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter),
        "Most Recent Update": ("date_updated", False, None, util.date_converter),
    }

    column_definitions = util.fill_in_column_definitions(properties, column_definitions)
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    for row in sheet:

        r = util.make_row(row)
        initializer = {}

        for i, value in enumerate(r):

            if i not in cols:
                continue
            properties = cols[i]

            required = properties["required"]
            default = properties["default"]
            converter = properties["converter"]
            model_field = properties["model_field"]

            if converter != None:
                value = converter(value)
            if value == None:
                if default != None:
                    value = default
            if value == None and required == True:
                raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows))

            if model_field == "facility_id":
                try:
                    cell = PrimaryCell.objects.get(facility_id=value)
                    initializer["reagent"] = cell
                except:
                    logger.exception("Primary Cell not found: %r, row: %d", value, rows + start_row + 1)
                    raise
            else:
                initializer[model_field] = value
        try:
            logger.debug("initializer: %r", initializer)
            cell = PrimaryCellBatch(**initializer)
            cell.save()
            logger.debug("primary cell batch created: %r", cell)
            rows += 1
        except Exception, e:
            logger.exception("Invalid Primary CellBatch initializer: %r, row: %d", initializer, rows + start_row + 1)
            raise
def main(path):
    """
    Read in the smallmolecule batch info
    """
    sheet_name = "sheet 1"
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row])  # Note, skipping the header row by default

    properties = ("model_field", "required", "default", "converter")
    column_definitions = {
        # NOTE: even though these db field are not integers,
        # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values
        "facility_id": ("facility_id", True, None, lambda x: util.convertdata(x, int)),
        "salt_id": ("salt_id", True, None, lambda x: util.convertdata(x, int)),
        "facility_batch_id": ("facility_batch_id", True, None, lambda x: util.convertdata(x, int)),
        "provider": ("provider", True),
        "provider_catalog_id": "provider_catalog_id",
        "provider_sample_id": "provider_sample_id",
        "chemical_synthesis_reference": "chemical_synthesis_reference",
        "purity": "purity",
        "purity_method": "purity_method",
        "aqueous_solubility": "aqueous_solubility",
        "aqueous_solubility_unit": "aqueous_solubility_unit",
        "Date Data Received": ("date_data_received", False, None, util.date_converter),
        "Date Loaded": ("date_loaded", False, None, util.date_converter),
        "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter),
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(("cols: ", cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {"facility_id": None, "salt_id": None}
        for i, value in enumerate(r):
            if i not in cols:
                continue
            properties = cols[i]

            logger.debug(str(("read col: ", i, ", ", properties)))
            required = properties["required"]
            default = properties["default"]
            converter = properties["converter"]
            model_field = properties["model_field"]

            # Todo, refactor to a method
            logger.debug(str(("raw value", value)))
            if converter != None:
                value = converter(value)
            if value == None:
                if default != None:
                    value = default
            if value == None and required == True:
                raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows))
            logger.debug(str(("model_field: ", model_field, ", value: ", value)))

            if model_field in small_molecule_lookup:
                small_molecule_lookup[model_field] = value
                if None not in small_molecule_lookup.values():
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer["smallmolecule"] = sm
                    except Exception, e:
                        logger.error(
                            str(("sm identifiers not found", small_molecule_lookup, "row", rows + start_row + 2))
                        )
                        raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(("initializer: ", initializer)))
            smb = SmallMoleculeBatch(**initializer)
            smb.save()
            logger.debug(str(("smb created:", smb)))
            rows += 1
        except Exception, e:
            logger.error(
                str(("Invalid smallmolecule batch initializer: ", initializer, "row", rows + start_row + 2, e))
            )
            raise
Esempio n. 19
0
def main(path):
    sheet_name = 'sheet 1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row])

    properties = ('model_field','required','default','converter')
    column_definitions = { 
        'facility_id': (
            'facility_id',True,None, lambda x: util.convertdata(x,int)),
        'salt_id': (
            'salt_id',True,None, lambda x: util.convertdata(x,int)),
        'facility_batch_id':(
            'batch_id',True,None, lambda x: util.convertdata(x,int)),
        'provider': ('provider_name',False),
        'provider_catalog_id':'provider_catalog_id',
        'provider_sample_id':'provider_batch_id',
        'molecular_weight':(
            '_molecular_weight',False,None, 
            lambda x: util.convertdata(x, float)),
        'molecular_formula':'_molecular_formula',
        'chemical_synthesis_reference':'_chemical_synthesis_reference',
        'purity':'_purity',
        'purity_method':'_purity_method',
        'aqueous_solubility':'aqueous_solubility',
        # FIXME: should warn the user if no unit is provided when 
        # aqueous_solubility is provided
        'aqueous_solubility_unit':'aqueous_solubility_unit',    
        'Date Data Received':(
            'date_data_received',False,None,util.date_converter),
        'Date Loaded': ('date_loaded',False,None,util.date_converter),
        'Date Publicly Available': (
            'date_publicly_available',False,None,util.date_converter),
        'Most Recent Update': (
            'date_updated',False,None,util.date_converter),
        }
    column_definitions = util.fill_in_column_definitions(
        properties,column_definitions)
    
    cols = util.find_columns(column_definitions, sheet.labels,
        all_sheet_columns_required=False)
    
    rows = 0    
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {'facility_id':None, 'salt_id':None}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception(
                    'Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))
            
            if(model_field in small_molecule_lookup):
                small_molecule_lookup[model_field]=value
                if( None not in small_molecule_lookup.values()):
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer['reagent'] = sm
                    except Exception, e:
                        logger.exception(
                            'sm identifiers not found: %r, row: %d', 
                            small_molecule_lookup,rows+start_row+2)
                        raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            smb = SmallMoleculeBatch(**initializer)
            smb.save()
            logger.debug(str(('smb created:', smb)))
            rows += 1
        except Exception, e:
            logger.exception(
                'Invalid smallmolecule batch initializer: %r, row: %d', 
                initializer, rows+start_row+2)
            raise
Esempio n. 20
0
def main(path):
    """
    Read in the Library and LibraryMapping sheets
    """
    libraries = readLibraries(path, 'Library')

    sheet = iu.readtable([path, 'LibraryMapping'])
    properties = ('model_field', 'required', 'default', 'converter')
    date_parser = lambda x: util.convertdata(x, date)
    column_definitions = {
        'Facility':
        ('facility_id', False, None, lambda x: util.convertdata(x, int)),
        'Salt': ('salt_id', False, None, lambda x: util.convertdata(x, int)),
        'Batch': ('batch_id', False, None, lambda x: util.convertdata(x, int)),
        'Is Control': ('is_control', False, False, util.bool_converter),
        'Plate': ('plate', False, None, lambda x: util.convertdata(x, int)),
        'Well':
        'well',
        'Library Name':
        'short_name',
        'Concentration':
        'concentration',
        'Concentration Unit':
        'concentration_unit'
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    small_molecule_batch_lookup = ('reagent', 'batch_id')
    library_mapping_lookup = ('smallmolecule_batch', 'library', 'is_control',
                              'plate', 'well', 'concentration',
                              'concentration_unit')
    rows = 0
    logger.debug(str(('cols: ', cols)))
    for row in sheet:
        current_row = rows + 2
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {'facility_id': None, 'salt_id': None}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception(
                    'Field is required: %s, record: %d' %
                    (properties['column_label'], 'row', current_row))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))

            initializer[model_field] = value

            if (model_field in small_molecule_lookup):
                small_molecule_lookup[model_field] = value
                if (None not in small_molecule_lookup.values()):
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer['reagent'] = sm
                    except Exception, e:
                        raise Exception(
                            str(('sm facility id not found',
                                 small_molecule_lookup, e, 'row',
                                 current_row)))
            elif (model_field == 'short_name'):
                try:
                    library = libraries[value]
                    initializer['library'] = library
                except Exception, e:
                    raise Exception(
                        str(('library short_name not found', value, e, 'row',
                             current_row)))
Esempio n. 21
0
def readLibraries(path, sheetName):

    sheet = iu.readtable([path, sheetName
                          ])  # Note, skipping the header row by default
    # dict to map spreadsheet fields to the Library fields
    properties = ('model_field', 'required', 'default', 'converter')
    date_parser = lambda x: util.convertdata(x, date)
    column_definitions = {
        'Name': ('name', True),  # TODO use the model to determine if req'd
        'ShortName': ('short_name', True),
        'Library Type':
        'type',
        'Date First Plated': ('date_first_plated', False, None, date_parser),
        'Date Data Received': ('date_data_received', False, None, date_parser),
        'Date Loaded': ('date_loaded', False, None, date_parser),
        'Date Publicly Available':
        ('date_publicly_available', False, None, date_parser),
        'Most Recent Update':
        ('date_updated', False, None, util.date_converter),
        'Is Restricted': ('is_restricted', False, False)
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    libraries = {}
    for row in sheet:
        logger.debug(str(('row raw: ', row)))
        r = util.make_row(row)
        logger.debug(str(('row: ', r)))
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            library = Library(**initializer)
            library.save()
            logger.info(str(('library created', library)))
            libraries[library.short_name] = library
            rows += 1
        except Exception, e:
            logger.error(str(('library initializer problem: ', initializer)))
            raise e
Esempio n. 22
0
def main(path):
    """
    Read in the Antibody Batches
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 1]) 

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              'AR_Center_Specific_ID': ('antibody_facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
              'AR_Center_Batch_ID': ('batch_id',True,None,lambda x:util.convertdata(x,int)),
              'AR_Center_Name': 'center_name',
              'AR_Provider_Name': 'provider_name',
              'AR_Provider_Catalog_ ID': 'provider_catalog_id',
              'AR_Provider_Batch_ID': 'provider_batch_id',
              'AR_Antibody_Purity': 'antibody_purity',

              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              }
              
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug('cols: %s' % cols)
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i,properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))

            logger.debug('model_field: %s, converted value %r'
                % (model_field, value) )
            initializer[model_field] = value
        try:
            logger.debug('initializer: %s' % initializer)
            
            antibody_facility_id = initializer.pop('antibody_facility_id',None)
            if antibody_facility_id: 
                try:
                    antibody = Antibody.objects.get(facility_id=antibody_facility_id)
                    initializer['reagent'] = antibody
                except ObjectDoesNotExist, e:
                    logger.error('AR_Center_Specific_ID: "%s" does not exist, row: %d' 
                        % (antibody_facility_id,i))
            antibody_batch = AntibodyBatch(**initializer)
            antibody_batch.save()
            logger.info('antibody batch created: %s' % antibody_batch)
            rows += 1
        except Exception, e:
            logger.error("Invalid antibody_batch initializer: %s" % initializer)
            raise
Esempio n. 23
0
def main(path):
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 1]) 

    properties = ('model_field','required','default','converter')
    column_definitions = { 
        'AR_Name': ('name',True),
        'AR_LINCS_ID': 'lincs_id', 
        'AR_Alternative_Name': 'alternative_names',
        'AR_Alternative_ID': 'alternative_id',
        'AR_Center_Canonical_ID': (
            'facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
        'AR_Clone_Name': 'clone_name',
        'AR_RRID': 'rrid',
        'AR_Antibody_Type': 'type',
        'target_protein_center_ids': 'target_protein_center_ids',
        'AR_Non-Protein_Target': 'non_protein_target_name',
        'AR_Target_Organism': 'target_organism',
        'other_target_information': 'other_target_information',    
        'other_human_target_protein_center_ids': 
            'other_human_target_protein_center_ids',
        'AR_Immunogen': 'immunogen',
        'AR_Immunogen_Sequence': 'immunogen_sequence',
        'AR_Antibody_Species': 'species',
        'AR_Antibody_Clonality': 'clonality',
        'AR_Antibody_Isotype': 'isotype',
        'AR_Antibody_Production_Source_Organism': 'source_organism',
        'AR_Antibody_Production_Details': 'production_details',
        'AR_Antibody_Labeling': 'labeling',
        'AR_Antibody_Labeling_Details': 'labeling_details',
        'AR_Relevant_Citations': 'relevant_citations',
        
        'Date Data Received':(
            'date_data_received',False,None,util.date_converter),
        'Date Loaded': ('date_loaded',False,None,util.date_converter),
        'Date Publicly Available': (
            'date_publicly_available',False,None,util.date_converter),
        'Most Recent Update': ('date_updated',False,None,util.date_converter),
        'Is Restricted':('is_restricted',False,False,util.bool_converter)}
              
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    cols = util.find_columns(column_definitions, sheet.labels, 
        all_sheet_columns_required=False)

    rows = 0    
    logger.debug('cols: %s' % cols)
    for row in sheet:
        logger.debug('row %s - %s' %(rows,row))
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i,properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if(value == None or value == 'None'):
                value = None
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))
            if(value and converter != None):
                value = converter(value)

            logger.debug('model_field: %s, converted value %r'
                % (model_field, value) )
            initializer[model_field] = value
        try:
            logger.debug('row: %s, initializer: %s' % (rows,initializer))
            
            target_protein_center_ids = initializer.pop(
                'target_protein_center_ids',None)
            other_human_target_protein_center_ids = initializer.pop(
                'other_human_target_protein_center_ids',None)

            antibody = Antibody.objects.create(**initializer)
            
            if target_protein_center_ids: 
                ids = [x for x in target_protein_center_ids.split(';')]
                try:
                    target_proteins = []
                    for id in ids:
                        id = id[id.index('HMSL')+4:]
                        target_proteins.append(
                            Protein.objects.get(facility_id=id))
                    antibody.target_proteins = target_proteins
                except ObjectDoesNotExist, e:
                    logger.error(
                        'target_protein_center_ids "%s" does not exist, row: %d' 
                        % (id,i))
                    raise
            if other_human_target_protein_center_ids: 
                ids = [x for x in 
                    other_human_target_protein_center_ids.split(';')]
                try:
                    other_target_proteins = []
                    for id in ids:
                        id = id[id.index('HMSL')+4:]
                        other_target_proteins.append(
                            Protein.objects.get(facility_id=id))
                    antibody.other_human_target_proteins = other_target_proteins
                except ObjectDoesNotExist, e:
                    logger.error(
                        'other_human_target_protein_center_ids "%s"'
                        ' does not exist, row: %d' 
                        % (id,i))
                    raise

            antibody.save()
            logger.info('antibody created: %s' % antibody)
            rows += 1

            # create a default batch - 0
            AntibodyBatch.objects.create(reagent=antibody,batch_id=0)
Esempio n. 24
0
def main(path):
    """
    Read in the Antibody
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              'AR_Name': ('name',True),
              'AR_LINCS_ID': 'lincs_id', 
              'AR_Alternative_Name': 'alternative_names',
              'AR_Center_ID': ('facility_id', True),
              'AR_Target_Protein': 'target_protein_name',
              'AR_Target_Protein_ID': 'target_protein_uniprot_id',
              'AR_Target_Gene': 'target_gene_name',
              'AR_Target_Gene_ID': 'target_gene_id',
              'AR_Target_Organism': 'target_organism',
              'AR_Immunogen': 'immunogen',
              'AR_Immunogen_Sequence': 'immunogen_sequence',
              'AR_AntibodyClonality': 'antibody_clonality',
              'AR_Source_Organism': 'source_organism',
              'AR_Antibody_Isotype': 'antibody_isotype',
              'AR_Engineering': 'engineering',
              'AR_Antibody_Purity': 'antibody_purity',
              'AR_Antibody_Labeling': 'antibody_labeling',
              'AR_Recommended_Experiment_Type': 'recommended_experiment_type',
              'AR_Relevant_Reference': 'relevant_reference',
              'AR_Specificity': 'specificity',
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              'Is Restricted':('is_restricted',False,False)}

              
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            antibody = Antibody(**initializer)
            antibody.save()
            logger.info(str(('antibody created: ', antibody)))
            rows += 1
        except Exception, e:
            logger.error(str(( "Invalid antibody initializer: ", initializer)))
            raise
Esempio n. 25
0
def main(path):
    """
    Read in the Cell
    """
    sheet_name = 'HMS-LINCS cell line metadata'
    sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = {
              'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
              'CL_Name':('name',True),
              'CL_LINCS_ID':'lincs_id',
              'CL_Alternate_Name':'alternative_names',
              'CL_Alternate_ID':'alternate_id',
              'CL_Center_Specific_ID':'center_specific_id',
              'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)),
              'Assay':'assay',
              'CL_Organism':'organism',
              'CL_Organ':'organ',
              'CL_Tissue':'tissue',
              'CL_Cell_Type':'cell_type',
              'CL_Cell_Type_Detail':'cell_type_detail',
              'CL_Donor_Sex': 'donor_sex',
              'CL_Donor_Age': ('donor_age_years',False,None,lambda x:util.convertdata(x,int)),
              'CL_Donor_Ethnicity': 'donor_ethnicity',
              'CL_Donor_Health_Status': 'donor_health_status',
              'CL_Disease':'disease',
              'CL_Disease_Detail':'disease_detail',
              'CL_Growth_Properties':'growth_properties',
              'CL_Genetic_Modification':'genetic_modification',
              'CL_Related_Projects':'related_projects',
              'CL_Recommended_Culture_Conditions':'recommended_culture_conditions',
              'CL_Verification_Reference_Profile':'verification_reference_profile',
              'CL_Known_Mutations':'mutations_known',
              'CL_Mutations_Citations':'mutations_citations',
              'CL_Molecular_Features': 'molecular_features',
              'CL_Relevant_Citations': 'relevant_citations',
              'CL_Reference_Source': 'reference_source',
              'CL_Reference_Source_ID': 'reference_source_id',
              'Reference Source URL': 'reference_source_url',
              'Usage Note': 'usage_note',
              
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              'Is Restricted':('is_restricted',False,False,util.bool_converter)}
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels, all_sheet_columns_required=False)
            
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value

        try:
            logger.debug(str(('initializer: ', initializer)))
            cell = Cell(**initializer)
            cell.save()
            logger.info(str(('cell created:', cell)))
            rows += 1

            # create a default batch - 0
            CellBatch.objects.create(reagent=cell,batch_id=0)
            
        except Exception, e:
            print "Invalid Cell, name: ", r[0]
            raise e
def main(path):
    """
    Read in the smallmolecule batch info
    """
    sheet_name = 'sheet 1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              # NOTE: even though these db field are not integers, 
              # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values
              'facility_id': ('facility_id',True,None, lambda x: util.convertdata(x,int)),
              'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)),
              'facility_batch_id':('batch_id',True,None, lambda x: util.convertdata(x,int)),
              'provider': ('provider_name',True),
              'provider_catalog_id':'provider_catalog_id',
              'provider_sample_id':'provider_batch_id',
              'chemical_synthesis_reference':'chemical_synthesis_reference',
              'purity':'purity',
              'purity_method':'purity_method',
              'aqueous_solubility':'aqueous_solubility',
              # FIXME: should warn the user if no unit is provided when 
              # aqueous_solubility is provided
              'aqueous_solubility_unit':'aqueous_solubility_unit',    
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels,
        all_sheet_columns_required=False)
    
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        small_molecule_lookup = {'facility_id':None, 'salt_id':None}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            
            if(model_field in small_molecule_lookup):
                small_molecule_lookup[model_field]=value
                if( None not in small_molecule_lookup.values()):
                    try:
                        sm = SmallMolecule.objects.get(**small_molecule_lookup)
                        initializer['reagent'] = sm
                    except Exception, e:
                        logger.error(str(('sm identifiers not found', small_molecule_lookup,'row',rows+start_row+2)))
                        raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            smb = SmallMoleculeBatch(**initializer)
            smb.save()
            logger.debug(str(('smb created:', smb)))
            rows += 1
        except Exception, e:
            logger.error(str(( "Invalid smallmolecule batch initializer: ", initializer, 'row', rows+start_row+2, e)))
            raise
Esempio n. 27
0
def main(path):
    """
    Read in the Antibody
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 0]) 

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              'AR_Name': ('name',True),
              'AR_LINCS_ID': 'lincs_id', 
              'AR_Alternative_Name': 'alternative_names',
              'AR_Center_Specific_ID': ('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
              'AR_Clone_Name': 'clone_name',
              'AR_RRID': 'rrid',
              'AR_Antibody_Type': 'type',
              'target_protein_lincs_id': (
                  'target_protein_lincs_id',False,None, 
                  lambda x: x[x.index('HMSL')+4:] if x else None ),
              'AR_Non-Protein_Target': 'non_protein_target_name',
              'AR_Target_Organism': 'target_organism',
              'AR_Immunogen': 'immunogen',
              'AR_Immunogen_Sequence': 'immunogen_sequence',
              'AR_Antibody_Species': 'species',
              'AR_Antibody_Clonality': 'clonality',
              'AR_Antibody_Isotype': 'isotype',
              'AR_Antibody_Production_Source_Organism': 'source_organism',
              'AR_Antibody_Production_Details': 'production_details',
              'AR_Antibody_Labeling': 'labeling',
              'AR_Antibody_Labeling_Details': 'labeling_details',
              'AR_Relevant_Citations': 'relevant_citations',

              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              'Is Restricted':('is_restricted',False,False,util.bool_converter)}
              
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug('cols: %s' % cols)
    for row in sheet:
        logger.debug('row %s - %s' %(rows,row))
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i,properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if(value == None or value == 'None'):
                value = None
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))
            if(value and converter != None):
                value = converter(value)

            logger.debug('model_field: %s, converted value %r'
                % (model_field, value) )
            initializer[model_field] = value
        try:
            logger.debug('row: %s, initializer: %s' % (rows,initializer))
            
            target_protein_lincs_id = initializer.pop('target_protein_lincs_id',None)
            if target_protein_lincs_id: 
                try:
                    target_protein = Protein.objects.get(lincs_id=target_protein_lincs_id)
                    initializer['target_protein'] = target_protein
                except ObjectDoesNotExist, e:
                    logger.error('target_protein_lincs_id "%s" does not exist, row: %d' 
                        % (target_protein_lincs_id,i))
            antibody = Antibody(**initializer)
            antibody.save()
            logger.info('antibody created: %s' % antibody)
            rows += 1

            # create a default batch - 0
            AntibodyBatch.objects.create(reagent=antibody,batch_id=0)
            
        except Exception, e:
            logger.error("Invalid antibody initializer: %s" % initializer)
            raise
Esempio n. 28
0
def main(path):
    """
    Read in the cell batch info
    """
    sheet_name = 'Sheet1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row
                          ])  # Note, skipping the header row by default

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        'Facility ID':
        ('facility_id', True, None, lambda x: x[x.index('HMSL') + 4:]),
        'CL_Batch_ID':
        ('batch_id', True, None, lambda x: util.convertdata(x, int)),
        'CL_Provider_Name':
        'provider_name',
        'CL_Provider_Batch_ID':
        'provider_batch_id',
        'CL_Provider_Catalog_ID':
        'provider_catalog_id',
        'CL_Quality_Verification':
        'quality_verification',
        'CL_Transient_Modification':
        'transient_modification',
        'Date Data Received':
        ('date_data_received', False, None, util.date_converter),
        'Date Loaded': ('date_loaded', False, None, util.date_converter),
        'Date Publicly Available':
        ('date_publicly_available', False, None, util.date_converter),
        'Most Recent Update': ('date_updated', False, None,
                               util.date_converter),
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(('cols: ', cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))

            if model_field == 'facility_id':
                try:
                    cell = Cell.objects.get(facility_id=value)
                    initializer['reagent'] = cell
                except:
                    logger.error(
                        str(("Cell not found", value, 'row',
                             rows + start_row + 2)))
                    raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            cell = CellBatch(**initializer)
            cell.save()
            logger.debug(str(('cell created:', cell)))
            rows += 1
        except Exception, e:
            logger.error(
                str(("Invalid CellBatch initializer: ", initializer, 'row',
                     rows + start_row + 2, e)))
            raise
Esempio n. 29
0
def main(path):
    """
    Read in the Cell
    """
    sheet_name = 'HMS-LINCS cell line metadata'
    sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = {
              'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
              'CL_Name':('name',True),
              'CL_ID':'cl_id',
              'CL_Alternate_Name':'alternate_name',
              'CL_Alternate_ID':'alternate_id',
              'CL_Center_Name':'center_name',
              'CL_Center_Specific_ID':'center_specific_id',
              'MGH_ID':('mgh_id',False,None,lambda x:util.convertdata(x,int)),
              'Assay':'assay',
              'CL_Provider_Name':'provider_name',
              'CL_Provider_Catalog_ID':'provider_catalog_id',
              'CL_Batch_ID':'batch_id',
              'CL_Organism':'organism',
              'CL_Organ':'organ',
              'CL_Tissue':'tissue',
              'CL_Cell_Type':'cell_type',
              'CL_Cell_Type_Detail':'cell_type_detail',
              'CL_Disease':'disease',
              'CL_Disease_Detail':'disease_detail',
              'CL_Growth_Properties':'growth_properties',
              'CL_Genetic_Modification':'genetic_modification',
              'CL_Related_Projects':'related_projects',
              'CL_Recommended_Culture_Conditions':'recommended_culture_conditions',
              'CL_Verification_Profile':'verification_profile',
              'CL_Verification_Reference_Profile':'verification_reference_profile',
              'CL_Mutations_Reference':'mutations_reference',
              'CL_Mutations_Explicit':'mutations_explicit',
              'CL_Organism_Gender':'organism_gender',
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              'Is Restricted':('is_restricted',False,False,util.bool_converter)}
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)
            
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value

        try:
            logger.debug(str(('initializer: ', initializer)))
            cell = Cell(**initializer)
            cell.save()
            logger.info(str(('cell created:', cell)))
            rows += 1
        except Exception, e:
            print "Invalid Cell, name: ", r[0]
            raise e
Esempio n. 30
0
 found=False
 for key,value in mappingColumnDict.items():
     if(value != -1): 
         found=True
 if(not found):
     raise Exception('at least one of: ' + str(mappingColumnDict.keys()) + 
                     ' must be defined and used in the Data sheet.')
 
 # Read the Datasheet, create DataPoint values for mapped column in each row
 logger.debug(str(('now read rows, save_interval:', save_interval)))
 loopStart = time.time()
 pointsSaved = 0
 rowsRead = 0
 for row in dataSheet:
     current_row = rowsRead+2
     r = util.make_row(row)
     dataRecord = DataRecord(dataset=dataset )
     map_column = mappingColumnDict['Small Molecule Batch']
     mapped = False
     if(map_column > -1):
         _read_small_molecule_batch(map_column,r,current_row,dataRecord)
     map_column = mappingColumnDict['Plate']
     if(map_column > -1):
         _read_plate_well(map_column,r,current_row, dataRecord)
     map_column = mappingColumnDict['Cell']
     if(map_column > -1):
         _read_cell(map_column,r,current_row,dataRecord)
     map_column = mappingColumnDict['Antibody']
     if(map_column > -1):
         _read_antibody(map_column,r,current_row,dataRecord)
     map_column = mappingColumnDict['OtherReagent']
Esempio n. 31
0
def main(path):
    """
    Read in the Protein
    """
    sheet_name = "HMS-LINCS Kinases"
    sheet = iu.readtable([path, sheet_name, 1])  # Note, skipping the header row by default

    properties = ("model_field", "required", "default", "converter")
    column_definitions = {
        "PP_Name": ("name", True),
        "PP_LINCS_ID": ("lincs_id", True, None, lambda x: x[x.index("HMSL") + 4 :]),
        "PP_UniProt_ID": "uniprot_id",
        "PP_Alternate_Name": "alternate_name",
        "PP_Alternate_Name[2]": "alternate_name_2",
        "PP_Provider": "provider",
        "PP_Provider_Catalog_ID": "provider_catalog_id",
        "PP_Batch_ID": "batch_id",
        "PP_Amino_Acid_Sequence": "amino_acid_sequence",
        "PP_Gene_Symbol": "gene_symbol",
        "PP_Gene_ID": "gene_id",
        "PP_Protein_Source": "protein_source",
        "PP_Protein_Form": "protein_form",
        "PP_Protein_Purity": "protein_purity",
        "PP_Protein_Complex": "protein_complex",
        "PP_Isoform": "isoform",
        "PP_Protein_Type": "protein_type",
        "PP_Source_Organism": "source_organism",
        "PP_Reference": "reference",
        "Date Data Received": ("date_data_received", False, None, util.date_converter),
        "Date Loaded": ("date_loaded", False, None, util.date_converter),
        "Date Publicly Available": ("date_publicly_available", False, None, util.date_converter),
        "Is Restricted": ("is_restricted", False, False),
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(("cols: ", cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols:
                continue
            properties = cols[i]

            logger.debug(str(("read col: ", i, ", ", properties)))
            required = properties["required"]
            default = properties["default"]
            converter = properties["converter"]
            model_field = properties["model_field"]

            # Todo, refactor to a method
            logger.debug(str(("raw value", value)))
            if converter != None:
                value = converter(value)
            if value == None:
                if default != None:
                    value = default
            if value == None and required == True:
                raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows))
            logger.debug(str(("model_field: ", model_field, ", value: ", value)))
            initializer[model_field] = value
        try:
            logger.debug(str(("initializer: ", initializer)))
            protein = Protein(**initializer)
            protein.save()
            logger.info(str(("protein created: ", protein)))
            rows += 1
        except Exception, e:
            logger.error(str(("Invalid protein initializer: ", initializer)))
            raise
Esempio n. 32
0
def main(path):
    
    # read in the two columns of the meta sheet to a dict that defines a DataSet
    metadata = read_metadata(path)
    dataset = DataSet(**metadata)
    dataset.save()
    
    # read in the data columns sheet to an array of dict's, each dict defines a DataColumn    
    dataColumnDefinitions = readDataColumns(path)
    
    # now that the array of DataColumn dicts is created, use them to create the DataColumn instances
    dataColumns = {}
    for dc in dataColumnDefinitions:
        dc['dataset'] = dataset
        dataColumn = DataColumn(**dc)
        dataColumn.save()
        dataColumns[dataColumn.name] = dataColumn    

    # read the Data sheet
    sheetname = 'Data'
    dataSheet = iu.readtable([path, sheetname])
    
    # First, map the sheet column indices to the DataColumns that were created
    dataColumnList = {}
    metaColumnDict = {'Well':-1, 'Plate':-1, 'Control Type':-1} # meta columns contain forensic information
    mappingColumnDict = {'Small Molecule':-1, 'Cell':-1, 'Protein':-1} # what is being studied - at least one is required
    # NOTE: this scheme is matching based on the labels between the "Data Column" sheet and the "Data" sheet
    for i,label in enumerate(dataSheet.labels):
        if(label == 'None' or label == 'well_id' or label.strip()=='' or label == 'Exclude' ): continue  
        if label in metaColumnDict: 
            metaColumnDict[label] = i
            continue
        if label in mappingColumnDict: 
            mappingColumnDict[label] = i
            continue
        if label in dataColumns:
            dataColumnList[i] = dataColumns[label] # note here "i" is the index to the dict
            
        else:
            #raise Exception("no datacolumn for the label: " + label)
            columnName = chr(ord('A') + i)
            findError = True
            for column in dataColumns.values():
                if(column.worksheet_column == columnName):
                    dataColumnList[i] = column
                    findError = False
                    break
            if findError:    
                print "Error: no datacolumn for ", label
                sys.exit(-1)
    
    found=False
    for key,value in mappingColumnDict.items():
        if(value != -1): 
            found=True
    if(not found):
        raise Exception('at least one of: ' + str(mappingColumnDict.keys()) + ' must be defined and used in the Data sheet.')
    
    # Read in the Data sheet, create DataPoint values for mapped column in each row
    pointsSaved = 0
    rowsRead = 0
    for row in dataSheet:
        r = util.make_row(row)
        dataRecord = DataRecord(dataset=dataset )
        map_column = mappingColumnDict['Small Molecule']
        mapped = False
        if(map_column > -1):
            try:
                value = util.convertdata(r[map_column].strip())
                if(value != None and value != '' ):
                    facility = value.split("-")[0] # TODO: purge "HMSL" from the db
                    salt = value.split("-")[1]
                    dataRecord.small_molecule = SmallMolecule.objects.get(facility_id=facility, sm_salt=salt)
                    mapped = True
            except Exception, e:
                print "Invalid Small Molecule facility id: ", value
                raise    
        map_column = mappingColumnDict['Cell']
        if(map_column > -1):
            try:
                value = util.convertdata(r[map_column].strip())
                if(value != None and value != '' ):
                    facility_id = value
                    dataRecord.cell = Cell.objects.get(facility_id=facility_id) # TODO: purge "HMSL" from the db
                    mapped = True
            except Exception, e:
                print "Invalid Cell facility id: ", facility_id
                raise    
Esempio n. 33
0
def main(path):
    """
    Read in the Protein
    """
    sheet_name = 'HMS-LINCS Kinases'

    # Note, skipping the header row by default
    sheet = iu.readtable([path, sheet_name, 1]) 

    properties = ('model_field','required','default','converter')
    column_definitions = { 
            'PP_Name':('name',True), 
            'PP_LINCS_ID':('lincs_id',True,None,lambda x: x[x.index('HMSL')+4:]), 
            'PP_UniProt_ID':'uniprot_id', 
            'PP_Alternate_Name':'alternate_name',
            'PP_Alternate_Name[2]':'alternate_name_2',
            'PP_Provider':'provider',
            'PP_Provider_Catalog_ID':'provider_catalog_id',
            'PP_Batch_ID':'batch_id', 
            'PP_Amino_Acid_Sequence':'amino_acid_sequence',
            'PP_Gene_Symbol':'gene_symbol', 
            'PP_Gene_ID':'gene_id',
            'PP_Protein_Source':'protein_source',
            'PP_Protein_Form':'protein_form', 
            'PP_Mutation':'mutation', 
            'PP_Phosphorylation_State':'phosphlorylation', 
            'PP_Domain':'protein_domain', 
            'PP_Protein_Purity':'protein_purity', 
            'PP_Protein_Complex':'protein_complex', 
            'PP_Isoform':'isoform', 
            'PP_Protein_Type':'protein_type', 
            'PP_Source_Organism':'source_organism', 
            'PP_Reference':'reference',
            'Date Data Received':('date_data_received',False,None,
                                  util.date_converter),
            'Date Loaded': ('date_loaded',False,None,util.date_converter),
            'Date Publicly Available': ('date_publicly_available',False,None,
                                        util.date_converter),
            'Most Recent Update': ('date_updated',False,None,util.date_converter),
            'Is Restricted':('is_restricted',False,False)}
    
    # convert the labels to fleshed out dict's, with strategies for optional, 
    # default and converter
    column_definitions = \
        util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                                    % (properties['column_label'],rows))
            logger.debug(str((
                'model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            protein = Protein(**initializer)
            protein.save()
            logger.info(str(('protein created: ', protein)))
            rows += 1
        except Exception, e:
            logger.error(str(("Invalid protein initializer: ", initializer, e)))
            raise
Esempio n. 34
0
def main(path):
    """
    Read in the OtherReagent
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name,
                          1])  # Note, skipping the header row by default

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        'OR_ID':
        'lincs_id',
        'Facility ID': ('facility_id', True),
        'OR_Alternate_ID':
        'alternate_id',
        'OR_Primary_Name': ('name', True),
        'OR_Alternate_Name':
        'alternative_names',
        'OR_Role':
        'role',
        'OR_Reference':
        'reference',
        'Date Data Received':
        ('date_data_received', False, None, util.date_converter),
        'Date Loaded': ('date_loaded', False, None, util.date_converter),
        'Date Publicly Available':
        ('date_publicly_available', False, None, util.date_converter),
        'Most Recent Update':
        ('date_updated', False, None, util.date_converter),
        'Is Restricted': ('is_restricted', False, False)
    }

    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(('cols: ', cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            reagent = OtherReagent(**initializer)
            reagent.save()
            logger.info(str(('OtherReagent created: ', reagent)))
            rows += 1

            # create a default batch - 0
            OtherReagentBatch.objects.create(reagent=reagent, batch_id=0)

        except Exception, e:
            logger.error(
                str(("Invalid OtherReagent initializer: ", initializer)))
            raise
Esempio n. 35
0
def readDataColumns(path):
    # Read in the DataColumn Sheet
    sheetname = 'Data Columns'
    dataColumnSheet = iu.readtable([path, sheetname])

    # Lookup all of the field types of the Datacolumn table.  
    # These will be used to validate input type by converting on read
    _fields = util.get_fields(DataColumn)
    _typelookup = dict((f.name, iu.totype(f)) for f in _fields)
    
    # TODO: Use the import_utils methods here
    # TODO: compare and combine this with the fieldinformation entity
    labels = {'Worksheet Column':'worksheet_column',
              'Display Order':'display_order',
              'Name':'name',
              'Display Name':'display_name',
              'Data Type':'data_type',
              'Decimal Places':'precision',
              'Description':'description',
              'Replicate Number':'replicate',
              'Unit':'unit', 
              'Assay readout type':'readout_type',
              'Comments':'comments',
              'Protein HMS LINCS ID': 'protein', 
              'Cell HMS LINCS ID': 'cell'}

    # create an array of dict's, each dict defines a DataColumn    
    dataColumnDefinitions = []
    #Note we also allow a list of pro
    # first the label row (it contains the worksheet column, it is unique)
    for v in dataColumnSheet.labels[1:]:
        dataColumnDefinitions.append({labels['Worksheet Column']:v})
        
    logger.debug(str(('========== datacolumns:',dataColumnDefinitions)))
    # for each row, create the dictionary entry in the dataColumnDefinitions
    for row in dataColumnSheet:
        rowAsUnicode = util.make_row(row)
        keyRead = rowAsUnicode[0]
        for i,cellText in enumerate(rowAsUnicode[1:]):
            try:
                for key,fieldName in labels.items():
                    # if one of the DataColumn fields, add it to the dict
                    if re.match(key,keyRead,re.M|re.I): 
                        if re.match('Protein HMS LINCS ID', keyRead, re.M|re.I):
                            facility_id = util.convertdata(cellText, int);
                            if facility_id:
                                dataColumnDefinitions[i][fieldName] = \
                                    Protein.objects.get(lincs_id=facility_id) 
                        elif re.match('Cell HMS LINCS ID', keyRead, re.M|re.I):
                            facility_id = util.convertdata(cellText, int);
                            if facility_id:
                                dataColumnDefinitions[i][fieldName] = \
                                    Cell.objects.get(facility_id=facility_id) 
                        else:
                            # Use the type from the fieldinformation table 
                            # to read in the data for each DC field
                            dataColumnDefinitions[i][fieldName] = \
                                util.convertdata(cellText,
                                                 _typelookup.get(fieldName, None)) 
                    else:
                        logger.debug(str((
                            '"Data Column definition not used: ', cellText)) ) 
                        pass
            except Exception, e:
                logger.error(str(('Exception reading data for cell', i, cellText, e)))
                raise e
        logger.debug(str(("definitions: ", dataColumnDefinitions)) )
Esempio n. 36
0
def main(path):
    """
    Read in the cell batch info
    """
    sheet_name = 'Sheet1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = { 
              'Facility ID':('facility_id',True,None, lambda x: x[x.index('HMSL')+4:]),
              'CL_Batch_ID':('batch_id',True,None,lambda x:util.convertdata(x,int)),
              'CL_Provider_Name':'provider_name',
              'CL_Provider_Batch_ID':'provider_batch_id',
              'CL_Provider_Catalog_ID':'provider_catalog_id',
              'CL_Quality_Verification':'quality_verification',
              'CL_Transient_Modification': 'transient_modification',
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)
    
    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (
                    properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            
            if model_field == 'facility_id':
                try:
                    cell = Cell.objects.get(facility_id=value)
                    initializer['reagent'] = cell
                except:
                    logger.error(str(("Cell not found", value, 'row',rows+start_row+2)))
                    raise
            else:
                initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            cell = CellBatch(**initializer)
            cell.save()
            logger.debug(str(('cell created:', cell)))
            rows += 1
        except Exception, e:
            logger.error(str(( "Invalid CellBatch initializer: ", initializer, 
                'row', rows+start_row+2, e)))
            raise
Esempio n. 37
0
def read_data(book, col_to_dc_map, first_small_molecule_column, dataset):

    datarecord_batch = []
    save_interval = 1000

    logger.debug('read the Data sheet')
    data_sheet = book.sheet_by_name('Data')
    
    for i,label in enumerate(data_sheet.row_values(0)):
        logger.debug('find datasheet label %r:%r' % (colname(i), label))
        if label in meta_columns: 
            meta_columns[label] = i
            continue
    
    logger.debug('meta_columns: %s, datacolumnList: %s' 
        % (meta_columns, col_to_dc_map) )
    logger.debug('read the data sheet, save_interval: %d' % save_interval)
    loopStart = time.time()
    pointsSaved = 0
    rows_read = 0
    for i in xrange(data_sheet.nrows-1):
        current_row = i + 2
        row = data_sheet.row_values(i+1)    

        r = util.make_row(row)
        datarecord = DataRecord(dataset=dataset)
        
        if meta_columns['Control Type'] > -1: 
            datarecord.control_type = util.convertdata(
                r[meta_columns['Control Type']])

        datapoint_batch = []
        small_molecule_datapoint = None 
        for i,dc in col_to_dc_map.items():
            value = r[i]
            logger.debug(
                'reading column %r, %s, val: %r' % (colname(i), dc, value))
            value = value.strip()
            value = util.convertdata(value)
            if not value: 
                continue
            datapoint = _create_datapoint(dc, dataset, datarecord, value)
            datapoint_batch.append(datapoint)
            pointsSaved += 1
            if not small_molecule_datapoint and dc.data_type == 'small_molecule':
                small_molecule_datapoint = datapoint
                
        if meta_columns['Plate'] > -1:
            _read_plate_well(
                meta_columns['Plate'], r, current_row, datarecord,
                first_small_molecule_column,small_molecule_datapoint,
                datapoint_batch)
        
        
        datarecord_batch.append((datarecord, datapoint_batch))
        rows_read += 1
        
        if (rows_read % save_interval == 0):
            bulk_create_datarecords(datarecord_batch)
            logger.debug(
                'datarecord batch created, rows_read: %d , time (ms): %d'
                    % (rows_read, time.time()-loopStart ) )
            count = bulk_create_datapoints(datarecord_batch)
            logger.debug('datapoints created in batch: %d ' % count)
            datarecord_batch=[]

    bulk_create_datarecords(datarecord_batch)
    et = time.time()-loopStart
    logger.debug(
        'final datarecord batch created, rows_read: %d, time (ms): %d' 
            % (rows_read, et))

    count = bulk_create_datapoints(datarecord_batch)
    logger.debug('created dps %d' % count )

    print 'Finished reading, rows_read: ', rows_read, ', points Saved: ', pointsSaved
    print 'elapsed: ', et , 'avg: ', et/rows_read
    
    cleanup_unused_datacolumns(dataset)
Esempio n. 38
0
def main(import_file, file_directory, deploy_dir):
    """
    Read in the qc events for batches 
    - version 1 - for small molecule batches
    """
    sheet_name = "Sheet1"
    start_row = 0
    sheet = iu.readtable([import_file, sheet_name, start_row])  # Note, skipping the header row by default

    properties = ("model_field", "required", "default", "converter")
    column_definitions = {
        "facility_id": ("facility_id_for", True, None, lambda x: util.convertdata(x, int)),
        "salt_id": ("salt_id_for", False, None, lambda x: util.convertdata(x, int)),
        "batch_id": ("batch_id_for", True, None, lambda x: util.convertdata(x, int)),
        "QC event date": ("date", True, None, util.date_converter),
        "outcome": ("outcome", True),
        "comment": "comment",
        "is_restricted": ("is_restricted", False, False, util.bool_converter),
        "file1": "file1",
        "file2": "file2",
        "file3": "file3",
        "file4": "file4",
        "file5": "file5",
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(("cols: ", cols)))
    for row in sheet:
        r = util.make_row(row)
        # store each row in a dict
        _dict = {}
        for i, value in enumerate(r):
            if i not in cols:
                continue
            properties = cols[i]

            logger.debug(str(("read col: ", i, ", ", properties)))
            required = properties["required"]
            default = properties["default"]
            converter = properties["converter"]
            model_field = properties["model_field"]

            logger.debug(str(("raw value", value)))
            if converter != None:
                value = converter(value)
            if value == None:
                if default != None:
                    value = default
            if value == None and required == True:
                raise Exception("Field is required: %s, record: %d" % (properties["column_label"], rows))
            logger.debug(str(("model_field: ", model_field, ", value: ", value)))
            _dict[model_field] = value

        logger.debug(str(("dict: ", _dict)))

        files_to_attach = []
        for i in range(10):
            filenameProp = "file%s" % i
            if _dict.get(filenameProp, None):
                fileprop = _dict[filenameProp]
                filepath = os.path.join(file_directory, fileprop)
                if not os.path.exists(filepath):
                    raise Exception(str(("file does not exist:", filepath, "row", rows + start_row)))
                filename = os.path.basename(filepath)
                relative_path = fileprop[: fileprop.index(filename)]

                # Move the file
                dest_dir = deploy_dir
                if not dest_dir:
                    dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR
                if not os.path.isdir(dest_dir):
                    raise Exception(str(("no such deploy directory, please create it", dest_dir)))
                if relative_path:
                    dest_dir = os.path.join(dest_dir, relative_path)
                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)
                deployed_path = os.path.join(dest_dir, filename)

                logger.debug(str(("deploy", filepath, deployed_path)))
                if os.path.exists(deployed_path):
                    os.remove(deployed_path)
                copy(filepath, deployed_path)
                if not os.path.isfile(deployed_path):
                    raise Exception(str(("could not deploy to", deployed_path)))
                else:
                    logger.debug(str(("successfully deployed to", deployed_path)))

                files_to_attach.append((filename, relative_path))

        initializer = None
        try:
            # create the qc record
            initializer = {
                key: _dict[key]
                for key in ["facility_id_for", "salt_id_for", "batch_id_for", "outcome", "comment", "date"]
            }
            qc_event = QCEvent(**initializer)
            qc_event.save()
            logger.debug(str(("saved", qc_event)))

            # create attached file records
            for (filename, relative_path) in files_to_attach:
                initializer = {
                    "qc_event": qc_event,
                    "filename": filename,
                    "relative_path": relative_path,
                    "is_restricted": _dict["is_restricted"],
                }
                qc_attached_file = QCAttachedFile(**initializer)
                qc_attached_file.save()
                logger.debug(str(("created qc attached file", qc_attached_file)))

            rows += 1

        except Exception, e:
            logger.error(str(("Invalid initializer: ", initializer, "row", rows + start_row + 2, e)))
            raise
def main(path):
    """
    Read in the OtherReagent
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 1]) # Note, skipping the header row by default

    properties = ('model_field','required','default','converter')
    column_definitions = { 
                          
              'OR_ID': 'lincs_id', 
              'Facility ID': ('facility_id',True),
              'OR_Alternate_ID': 'alternate_id',
              'OR_Primary_Name': ('name',True),
              'OR_Alternate_Name': 'alternative_names',
              'OR_Role': 'role',
              'OR_Reference': 'reference',                          
              'Date Data Received':('date_data_received',False,None,util.date_converter),
              'Date Loaded': ('date_loaded',False,None,util.date_converter),
              'Date Publicly Available': ('date_publicly_available',False,None,util.date_converter),
              'Most Recent Update': ('date_updated',False,None,util.date_converter),
              'Is Restricted':('is_restricted',False,False)}

              
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(properties,column_definitions)
    
    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0    
    logger.debug(str(('cols: ' , cols)))
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            # Todo, refactor to a method
            logger.debug(str(('raw value', value)))
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' % (properties['column_label'],rows))
            logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
            initializer[model_field] = value
        try:
            logger.debug(str(('initializer: ', initializer)))
            reagent = OtherReagent(**initializer)
            reagent.save()
            logger.info(str(('OtherReagent created: ', reagent)))
            rows += 1
        except Exception, e:
            logger.error(str(( "Invalid OtherReagent initializer: ", initializer)))
            raise
Esempio n. 40
0
def main(path):
    """
    Read in the Antibody Batches
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 0])

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        'AR_Center_Specific_ID': ('antibody_facility_id', True, None,
                                  lambda x: x[x.index('HMSL') + 4:]),
        'AR_Batch_ID':
        ('batch_id', True, None, lambda x: util.convertdata(x, int)),
        'AR_Provider_Name':
        'provider_name',
        'AR_Provider_Catalog_ ID':
        'provider_catalog_id',
        'AR_Provider_Batch_ID':
        'provider_batch_id',
        'AR_Antibody_Purity':
        'antibody_purity',
        'Date Data Received':
        ('date_data_received', False, None, util.date_converter),
        'Date Loaded': ('date_loaded', False, None, util.date_converter),
        'Date Publicly Available':
        ('date_publicly_available', False, None, util.date_converter),
        'Most Recent Update':
        ('date_updated', False, None, util.date_converter),
    }

    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug('cols: %s' % cols)
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i, properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))

            logger.debug('model_field: %s, converted value %r' %
                         (model_field, value))
            initializer[model_field] = value
        try:
            logger.debug('initializer: %s' % initializer)

            antibody_facility_id = initializer.pop('antibody_facility_id',
                                                   None)
            if antibody_facility_id:
                try:
                    antibody = Antibody.objects.get(
                        facility_id=antibody_facility_id)
                    initializer['reagent'] = antibody
                except ObjectDoesNotExist, e:
                    logger.error(
                        'AR_Center_Specific_ID: "%s" does not exist, row: %d' %
                        (antibody_facility_id, i))
            antibody_batch = AntibodyBatch(**initializer)
            antibody_batch.save()
            logger.info('antibody batch created: %s' % antibody_batch)
            rows += 1
        except Exception, e:
            logger.error("Invalid antibody_batch initializer: %s" %
                         initializer)
            raise
Esempio n. 41
0
def main(path):
    """
    Read in the Antibody
    """
    sheet_name = 'Sheet1'
    sheet = iu.readtable([path, sheet_name, 0])

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        'AR_Name': ('name', True),
        'AR_LINCS_ID':
        'lincs_id',
        'AR_Alternative_Name':
        'alternative_names',
        'AR_Center_Specific_ID':
        ('facility_id', True, None, lambda x: x[x.index('HMSL') + 4:]),
        'AR_Clone_Name':
        'clone_name',
        'AR_RRID':
        'rrid',
        'AR_Antibody_Type':
        'type',
        'target_protein_lincs_id': ('target_protein_lincs_id', False, None,
                                    lambda x: x[x.index('HMSL') + 4:]
                                    if x else None),
        'AR_Non-Protein_Target':
        'non_protein_target_name',
        'AR_Target_Organism':
        'target_organism',
        'AR_Immunogen':
        'immunogen',
        'AR_Immunogen_Sequence':
        'immunogen_sequence',
        'AR_Antibody_Species':
        'species',
        'AR_Antibody_Clonality':
        'clonality',
        'AR_Antibody_Isotype':
        'isotype',
        'AR_Antibody_Production_Source_Organism':
        'source_organism',
        'AR_Antibody_Production_Details':
        'production_details',
        'AR_Antibody_Labeling':
        'labeling',
        'AR_Antibody_Labeling_Details':
        'labeling_details',
        'AR_Relevant_Citations':
        'relevant_citations',
        'Date Data Received':
        ('date_data_received', False, None, util.date_converter),
        'Date Loaded': ('date_loaded', False, None, util.date_converter),
        'Date Publicly Available':
        ('date_publicly_available', False, None, util.date_converter),
        'Most Recent Update':
        ('date_updated', False, None, util.date_converter),
        'Is Restricted': ('is_restricted', False, False, util.bool_converter)
    }

    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug('cols: %s' % cols)
    for row in sheet:
        logger.debug('row %s - %s' % (rows, row))
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i, properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if (value == None or value == 'None'):
                value = None
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            if (value and converter != None):
                value = converter(value)

            logger.debug('model_field: %s, converted value %r' %
                         (model_field, value))
            initializer[model_field] = value
        try:
            logger.debug('row: %s, initializer: %s' % (rows, initializer))

            target_protein_lincs_id = initializer.pop(
                'target_protein_lincs_id', None)
            if target_protein_lincs_id:
                try:
                    target_protein = Protein.objects.get(
                        lincs_id=target_protein_lincs_id)
                    initializer['target_protein'] = target_protein
                except ObjectDoesNotExist, e:
                    logger.error(
                        'target_protein_lincs_id "%s" does not exist, row: %d'
                        % (target_protein_lincs_id, i))
            antibody = Antibody(**initializer)
            antibody.save()
            logger.info('antibody created: %s' % antibody)
            rows += 1

            # create a default batch - 0
            AntibodyBatch.objects.create(reagent=antibody, batch_id=0)

        except Exception, e:
            logger.error("Invalid antibody initializer: %s" % initializer)
            raise
Esempio n. 42
0
            continue

    logger.debug('meta_columns: %s, datacolumnList: %s' %
                 (meta_columns, col_to_dc_map))

    logger.debug('read the data sheet, save_interval: %d' % save_interval)
    loopStart = time.time()
    pointsSaved = 0
    rows_read = 0
    col_to_dc_items = col_to_dc_map.items()

    for i in xrange(data_sheet.nrows - 1):
        current_row = i + 2
        row = data_sheet.row_values(i + 1)

        r = util.make_row(row)
        datarecord = DataRecord(dataset=dataset)

        if meta_columns['Control Type'] > -1:
            datarecord.control_type = util.convertdata(
                r[meta_columns['Control Type']])

        datapoint_batch = []
        small_molecule_datapoint = None
        for i, dc in col_to_dc_items:
            value = r[i]
            logger.debug('reading column %r, %s, val: %r' %
                         (colname(i), dc, value))
            value = value.strip()
            value = util.convertdata(value)
            if not value:
Esempio n. 43
0
def main(import_file, file_directory, deploy_dir):
    """
    Read in the qc events for batches 
    - version 1 - for small molecule batches
    """
    sheet_name = 'Sheet1'
    start_row = 0
    sheet = iu.readtable([import_file, sheet_name, start_row
                          ])  # Note, skipping the header row by default

    properties = ('model_field', 'required', 'default', 'converter')
    column_definitions = {
        'facility_id':
        ('facility_id_for', True, None, lambda x: util.convertdata(x, int)),
        'salt_id':
        ('salt_id_for', False, None, lambda x: util.convertdata(x, int)),
        'batch_id':
        ('batch_id_for', True, None, lambda x: util.convertdata(x, int)),
        'QC event date': ('date', True, None, util.date_converter),
        'outcome': ('outcome', True),
        'comment':
        'comment',
        'is_restricted': ('is_restricted', False, False, util.bool_converter),
        'file1':
        'file1',
        'file2':
        'file2',
        'file3':
        'file3',
        'file4':
        'file4',
        'file5':
        'file5',
    }
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    column_definitions = util.fill_in_column_definitions(
        properties, column_definitions)

    # create a dict mapping the column ordinal to the proper column definition dict
    cols = util.find_columns(column_definitions, sheet.labels)

    rows = 0
    logger.debug(str(('cols: ', cols)))
    for row in sheet:
        r = util.make_row(row)
        # store each row in a dict
        _dict = {}
        for i, value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug(str(('read col: ', i, ', ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug(str(('raw value', value)))
            if (converter != None):
                value = converter(value)
            if (value == None):
                if (default != None):
                    value = default
            if (value == None and required == True):
                raise Exception('Field is required: %s, record: %d' %
                                (properties['column_label'], rows))
            logger.debug(
                str(('model_field: ', model_field, ', value: ', value)))
            _dict[model_field] = value

        logger.debug(str(('dict: ', _dict)))

        files_to_attach = []
        for i in range(10):
            filenameProp = 'file%s' % i
            if _dict.get(filenameProp, None):
                fileprop = _dict[filenameProp]
                filepath = os.path.join(file_directory, fileprop)
                if not os.path.exists(filepath):
                    raise Exception(
                        str(('file does not exist:', filepath, 'row',
                             rows + start_row)))
                filename = os.path.basename(filepath)
                relative_path = fileprop[:fileprop.index(filename)]

                # Move the file
                dest_dir = deploy_dir
                if not dest_dir:
                    dest_dir = settings.STATIC_AUTHENTICATED_FILE_DIR
                if not os.path.isdir(dest_dir):
                    raise Exception(
                        str(('no such deploy directory, please create it',
                             dest_dir)))
                if relative_path:
                    dest_dir = os.path.join(dest_dir, relative_path)
                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)
                deployed_path = os.path.join(dest_dir, filename)

                logger.debug(str(('deploy', filepath, deployed_path)))
                if os.path.exists(deployed_path):
                    os.remove(deployed_path)
                copy(filepath, deployed_path)
                if not os.path.isfile(deployed_path):
                    raise Exception(str(
                        ('could not deploy to', deployed_path)))
                else:
                    logger.debug(
                        str(('successfully deployed to', deployed_path)))

                files_to_attach.append((filename, relative_path))

        initializer = None
        try:
            # create the qc record
            initializer = {
                key: _dict[key]
                for key in [
                    'facility_id_for', 'salt_id_for', 'batch_id_for',
                    'outcome', 'comment', 'date'
                ]
            }
            qc_event = QCEvent(**initializer)
            qc_event.save()
            logger.debug(str(('saved', qc_event)))

            # create attached file records
            for (filename, relative_path) in files_to_attach:
                initializer = {
                    'qc_event': qc_event,
                    'filename': filename,
                    'relative_path': relative_path,
                    'is_restricted': _dict['is_restricted']
                }
                qc_attached_file = QCAttachedFile(**initializer)
                qc_attached_file.save()
                logger.debug(
                    str(('created qc attached file', qc_attached_file)))

            rows += 1

        except Exception, e:
            logger.error(
                str(("Invalid initializer: ", initializer, 'row',
                     rows + start_row + 2, e)))
            raise
Esempio n. 44
0
def main(path):
    sheet_name = 'sheet 1'
    start_row = 1
    sheet = iu.readtable([path, sheet_name, start_row])

    properties = ('model_field','required','default','converter')
    column_definitions = { 
        'facility_id': (
            'facility_id',True,None, lambda x: util.convertdata(x,int)),
        'facility_batch_id':(
            'batch_id',True,None, lambda x: util.convertdata(x,int)),
        'provider': ('provider_name',False),
        'provider_catalog_id':'provider_catalog_id',
        'provider_sample_id':'provider_batch_id',
        'Date Data Received':(
            'date_data_received',False,None,util.date_converter),
        'Date Loaded': ('date_loaded',False,None,util.date_converter),
        'Date Publicly Available': (
            'date_publicly_available',False,None,util.date_converter),
        'Most Recent Update': (
            'date_updated',False,None,util.date_converter),
        }
    column_definitions = util.fill_in_column_definitions(
        properties,column_definitions)
    
    cols = util.find_columns(column_definitions, sheet.labels,
        all_sheet_columns_required=False)
    
    rows = 0    
    logger.debug('cols: %s' % cols)
    for row in sheet:
        r = util.make_row(row)
        dict = {}
        initializer = {}
        for i,value in enumerate(r):
            if i not in cols: continue
            properties = cols[i]

            logger.debug('read col: %d: %s' % (i,properties))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']

            logger.debug('raw value %r' % value)
            if(converter != None):
                value = converter(value)
            if(value == None ):
                if( default != None ):
                    value = default
            if(value == None and  required == True):
                raise Exception('Field is required: %s, record: %d' 
                    % (properties['column_label'],rows))

            logger.debug('model_field: %s, converted value %r'
                % (model_field, value) )
            initializer[model_field] = value
        try:
            logger.debug('initializer: %s' % initializer)
            
            facility_id = initializer.pop('facility_id',None)
            try:
                other_reagent = OtherReagent.objects.get(facility_id=facility_id)
                initializer['reagent'] = other_reagent
            except ObjectDoesNotExist, e:
                logger.error('facility_id: "%s" does not exist, row: %d' 
                    % (facility_id,i))
            batch = OtherReagentBatch(**initializer)
            batch.save()
            logger.debug('batch created: %s', batch)
            rows += 1
        except Exception, e:
            logger.error("Invalid other_reagent_batch initializer: %s" % initializer)
            raise