def HTS_GroupDataExtractMySql(dataSelector,table): from HTS_dataDict import HTS_dataDict import xlrd as xl import numpy as np import matplotlib.pyplot as plt from properties import Properties from dbconnect import DBConnect, UniqueImageClause, UniqueObjectClause, GetWhereClauseForImages, GetWhereClauseForObjects, image_key_columns, object_key_columns import sqltools as sql p = Properties.getInstance() db = DBConnect.getInstance() dependentDataValues = False independentDataValues = False # def buildquery(self, table): # q = 'SELECT ' + self.independentVar + ', '+ self.dependentVar +' FROM ' + table # q2= [' WHERE '+ y[1] +' LIKE ' +y[0] for y in group.pairs] # return q + ''.join(q2) returnDict = HTS_dataDict(dataSelector) #for iFile in range(nFiles): dataSelector.clearAllIndici print('HTS_GroupDataExtract: using sheet %s for %s\n',table) query ='SELECT `' + dataSelector.independentVariable + '` FROM ' + '`' +table+'`' print query dataSelector.findValidIndiciFromDataColumn(dataSelector.independentVariable,db.execute(query)) query = 'SELECT `' + dataSelector.dependentVariable + '` FROM ' + '`' +table+'`' print query dataSelector.findValidIndiciFromDataColumn(dataSelector.dependentVariable,db.execute(query)) print dependentDataValues, independentDataValues, "variables" if not dependentDataValues: print 'HTS_GroupDataExtract: dependent variable not found in %s', if not independentDataValues: print 'HTS_GroupDataExtract: independent variable not found in %s' # print dataSelector.extractDataFromGroups(dependentDataValues,independentDataValues) dataSelector.table = table returnDict.dict[table] = dataSelector.extractDataFromGroupsMysql(dependentDataValues,independentDataValues) # if nFiles > 1: # # Get data from the selector # nGroups = dataSelector.nGroups # groupKeys = dataSelector.getGroupDescriptions() # # Create the combined group # combinedData = {} # tmpStruct = {} # tmpStruct['dependentData'] = np.array([]) # tmpStruct['independentData'] = np.array([]) # for iGrp in range(nGroups): # combinedData[groupKeys[iGrp]] = tmpStruct # for iFile in range(nFiles): # #print('the key = %s\n') # fileData = returnDict.dict[dataFiles[iFile]] # for iGrp in range(nGroups): # grpKey = groupKeys[iGrp] # #combDepData = combinedData[grpKey]['dependentData'] # fileDepData = fileData[grpKey]['dependentData'] # tmpStruct['dependentData'] = np.concatenate((tmpStruct['dependentData'],fileDepData[:])) # #combIndData = [combinedData[grpKey]['independentData']] # fileIndData = fileData[grpKey]['independentData'] # tmpStruct['independentData'] = np.concatenate((tmpStruct['independentData'],fileIndData[:])) # combinedData[grpKey] = tmpStruct # # returnDict.dict['combinedData'] = combinedData return returnDict
def HTS_GroupDataExtract(dataSelector,dataFiles): from HTS_dataDict import HTS_dataDict import xlrd as xl import numpy as np dependentDataValues = False independentDataValues = False returnDict = HTS_dataDict(dataSelector) nFiles = len(dataFiles) for iFile in range(nFiles): dataSelector.clearAllIndici dataFile = dataFiles[iFile] book = xl.open_workbook(dataFile) sheetName = '' for sheet in book.sheet_names(): if sheet == 'AutoCreate': sheetName = 'AutoCreate' sheet = book.sheet_by_name(sheetName) if sheet=='Math': sheetName = 'Math' sheet = book.sheet_by_name(sheetName) if sheet== 'Corrected': sheetName = 'Corrected' sheet = book.sheet_by_name(sheetName) if sheetName == '': print ('HTS_GroupDataExtract: %s does not have a recognized sheet name',dataFile) print('HTS_GroupDataExtract: using sheet %s for %s\n',sheetName,dataFile) headerRows = zip(sheet.row_values(0), sheet.row_values(1)) for pair in range(len(headerRows)): headerRows[pair] = headerRows[pair][0] + headerRows[pair][1] for col in range(len(headerRows)): #print headerRows[col], dataSelector.independentVariable if headerRows[col].replace(' ','') == dataSelector.independentVariable.replace(' ', ''): independentDataValues = sheet.col_values(col,2) if headerRows[col] == dataSelector.dependentVariable: dependentDataValues = sheet.col_values(col,2) dataSelector.findValidIndiciFromDataColumn(headerRows[col],sheet.col_values(col,2)) if not dependentDataValues: print('HTS_GroupDataExtract: dependent variable not found in %s',dataFile) if not independentDataValues: print('HTS_GroupDataExtract: independent variable not found in %s',dataFile) # print dataSelector.extractDataFromGroups(dependentDataValues,independentDataValues) returnDict.dict[dataFile] = dataSelector.extractDataFromGroups(dependentDataValues,independentDataValues) if nFiles > 1: # Get data from the selector nGroups = dataSelector.nGroups groupKeys = dataSelector.getGroupDescriptions() # Create the combined group combinedData = {} tmpStruct = {} tmpStruct['dependentData'] = np.array([]) tmpStruct['independentData'] = np.array([]) for iGrp in range(nGroups): combinedData[groupKeys[iGrp]] = tmpStruct for iFile in range(nFiles): #print('the key = %s\n') fileData = returnDict.dict[dataFiles[iFile]] for iGrp in range(nGroups): grpKey = groupKeys[iGrp] #combDepData = combinedData[grpKey]['dependentData'] fileDepData = fileData[grpKey]['dependentData'] tmpStruct['dependentData'] = np.concatenate((tmpStruct['dependentData'],fileDepData[:])) #combIndData = [combinedData[grpKey]['independentData']] fileIndData = fileData[grpKey]['independentData'] tmpStruct['independentData'] = np.concatenate((tmpStruct['independentData'],fileIndData[:])) combinedData[grpKey] = tmpStruct returnDict.dict['combinedData'] = combinedData return returnDict