for key,value in mappingColumnDict.items(): if(value != -1): found=True if(not found): raise Exception('at least one of: ' + str(mappingColumnDict.keys()) + ' must be defined and used in the Data sheet.') # Read the Datasheet, create DataPoint values for mapped column in each row logger.debug(str(('now read rows, save_interval:', save_interval))) loopStart = time.time() pointsSaved = 0 rowsRead = 0 for row in dataSheet: current_row = rowsRead+2 r = util.make_row(row) dataRecord = DataRecord(dataset=dataset ) map_column = mappingColumnDict['Small Molecule Batch'] mapped = False if(map_column > -1): _read_small_molecule_batch(map_column,r,current_row,dataRecord) map_column = mappingColumnDict['Plate'] if(map_column > -1): _read_plate_well(map_column,r,current_row, dataRecord) map_column = mappingColumnDict['Cell'] if(map_column > -1): _read_cell(map_column,r,current_row,dataRecord) map_column = mappingColumnDict['Antibody'] if(map_column > -1): _read_antibody(map_column,r,current_row,dataRecord) map_column = mappingColumnDict['OtherReagent'] if(map_column > -1):
found=False for key,value in mappingColumnDict.items(): if(value != -1): found=True if(not found): raise Exception('at least one of: ' + str(mappingColumnDict.keys()) + ' must be defined and used in the Data sheet.') # Read in the Data sheet, create DataPoint values for mapped column in each row logger.info(str(('data sheet columns identified, read rows, save_interval:', save_interval))) loopStart = time.time() pointsSaved = 0 rowsRead = 0 for row in dataSheet: current_row = rowsRead+2 r = util.make_row(row) dataRecord = DataRecord(dataset=dataset ) map_column = mappingColumnDict['Small Molecule Batch'] mapped = False if(map_column > -1): try: value = util.convertdata(r[map_column].strip()) if(value != None and value != '' ): value = value.split("-") if len(value) < 2: raise Exception('Small Molecule (Batch) format is #####-###(-#) **Note that (batch) is optional') x = value[0] facility = util.convertdata(x,int) salt = value[1] try: dataRecord.smallmolecule = SmallMolecule.objects.get(facility_id=facility, salt_id=salt) except Exception, e: logger.error(str(('could not locate small molecule:', facility)))
def read_data(book, col_to_dc_map, first_small_molecule_column, dataset): datarecord_batch = [] save_interval = 1000 logger.debug('read the Data sheet') data_sheet = book.sheet_by_name('Data') for i,label in enumerate(data_sheet.row_values(0)): logger.debug('find datasheet label %r:%r' % (colname(i), label)) if label in meta_columns: meta_columns[label] = i continue logger.debug('meta_columns: %s, datacolumnList: %s' % (meta_columns, col_to_dc_map) ) logger.debug('read the data sheet, save_interval: %d' % save_interval) loopStart = time.time() pointsSaved = 0 rows_read = 0 for i in xrange(data_sheet.nrows-1): current_row = i + 2 row = data_sheet.row_values(i+1) r = util.make_row(row) datarecord = DataRecord(dataset=dataset) if meta_columns['Control Type'] > -1: datarecord.control_type = util.convertdata( r[meta_columns['Control Type']]) datapoint_batch = [] small_molecule_datapoint = None for i,dc in col_to_dc_map.items(): value = r[i] logger.debug( 'reading column %r, %s, val: %r' % (colname(i), dc, value)) value = value.strip() value = util.convertdata(value) if not value: continue datapoint = _create_datapoint(dc, dataset, datarecord, value) datapoint_batch.append(datapoint) pointsSaved += 1 if not small_molecule_datapoint and dc.data_type == 'small_molecule': small_molecule_datapoint = datapoint if meta_columns['Plate'] > -1: _read_plate_well( meta_columns['Plate'], r, current_row, datarecord, first_small_molecule_column,small_molecule_datapoint, datapoint_batch) datarecord_batch.append((datarecord, datapoint_batch)) rows_read += 1 if (rows_read % save_interval == 0): bulk_create_datarecords(datarecord_batch) logger.debug( 'datarecord batch created, rows_read: %d , time (ms): %d' % (rows_read, time.time()-loopStart ) ) count = bulk_create_datapoints(datarecord_batch) logger.debug('datapoints created in batch: %d ' % count) datarecord_batch=[] bulk_create_datarecords(datarecord_batch) et = time.time()-loopStart logger.debug( 'final datarecord batch created, rows_read: %d, time (ms): %d' % (rows_read, et)) count = bulk_create_datapoints(datarecord_batch) logger.debug('created dps %d' % count ) print 'Finished reading, rows_read: ', rows_read, ', points Saved: ', pointsSaved print 'elapsed: ', et , 'avg: ', et/rows_read cleanup_unused_datacolumns(dataset)
logger.debug('meta_columns: %s, datacolumnList: %s' % (meta_columns, col_to_dc_map)) logger.debug('read the data sheet, save_interval: %d' % save_interval) loopStart = time.time() pointsSaved = 0 rows_read = 0 col_to_dc_items = col_to_dc_map.items() for i in xrange(data_sheet.nrows - 1): current_row = i + 2 row = data_sheet.row_values(i + 1) r = util.make_row(row) datarecord = DataRecord(dataset=dataset) if meta_columns['Control Type'] > -1: datarecord.control_type = util.convertdata( r[meta_columns['Control Type']]) datapoint_batch = [] small_molecule_datapoint = None for i, dc in col_to_dc_items: value = r[i] logger.debug('reading column %r, %s, val: %r' % (colname(i), dc, value)) value = value.strip() value = util.convertdata(value) if not value: continue
logger.debug('meta_columns: %s, datacolumnList: %s' % (meta_columns, col_to_dc_map) ) logger.debug('read the data sheet, save_interval: %d' % save_interval) loopStart = time.time() pointsSaved = 0 rows_read = 0 col_to_dc_items = col_to_dc_map.items() for i in xrange(data_sheet.nrows-1): current_row = i + 2 row = data_sheet.row_values(i+1) r = util.make_row(row) datarecord = DataRecord(dataset=dataset) if meta_columns['Control Type'] > -1: datarecord.control_type = util.convertdata( r[meta_columns['Control Type']]) datapoint_batch = [] small_molecule_datapoint = None for i,dc in col_to_dc_items: value = r[i] logger.debug( 'reading column %r, %s, val: %r' % (colname(i), dc, value)) value = value.strip() value = util.convertdata(value) if not value: continue