def __fetchQueryFromLocation(self,groupid,type): #TODO: Case Type for FileSytem/MySQL/URL try: f = open(utils.get_analytics_lib()+"/reports/"+str(groupid).strip()+"/"+self.name+".sql",'r') self.query = f.read() f.close() except IOError: raise NameError("The report trying to be fetch is not available.")
def buildQuery(self,groupid,type='csv',dsrange=[None,time.strftime('%Y-%m-%d')],paramOrderList=None,ignore_aggregates=False,leftKeyTable=None,leftKey=None,eventList=None): #Make sure index exist in event list. The first two events will be passed to the join #function, if other events are in the list, then consequent joins are created. #Check the leftKeyTable, if it is not None, then we need to rearrange the events so that the first index matches the leftKeyTable value. if leftKeyTable is not None: #Search for a match in self.events list item = None for event in self.events: if event.name == leftKeyTable: item = event #If a match was found if item is not None: #Remove match from list and append it to the front item.addToParameters(leftKey) if self.events[0].name != leftKeyTable: self.events.insert(0,self.events.pop(self.events.index(item))) else: #Nothing found, so table must be forced in list leftKeyEvent = event.Event(leftKeyTable,leftKeyTable,'uuid','uuid') leftKeyEvent.addToParameters(leftKey) self.events.insert(0,leftKeyEvent) #Index and Result Variables index = 0 counter = 1 result = None eventsLen = len(self.events) selected_params = ','.join(paramOrderList) #Holds the default transformation details, which is comma seperated columns transform = "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' USING '/bin/cat' AS dummy" #Holds any required JAR of FILE for the transformation to occur. Could be CSV. requires = "" if type == 'csv': requires = "add file " + utils.get_analytics_lib() + "/python_streaming/hive2csv.py; " transform = "USING 'hive2csv.py' AS dummy" while index < eventsLen: event = self.events[index] if (eventsLen is 1): result = "(" + event.buildSubQuery(groupid,dsrange) + ") " + event.name# + ") k"# + str((self.keyJoinCount+1)) #If its not the very end, join with index+1 elif (index < (eventsLen - 1)): result = "" + self.joinEvents(groupid,event,self.events[(index+1)],dsrange,) + "" else: result = "" + self.joinEvents(groupid,result,event,dsrange,) + "" index += 2 result = requires + "select TRANSFORM(" + selected_params + ") " + transform + " from " + result + " " #Group By Keys, If there is a leftmost table and no uuid key, then we need to use it's UUID otherwise just the selected parameters if leftKeyTable is not None and ignore_aggregates == False: if (str(leftKeyTable) + "." + leftKey) not in paramOrderList: paramOrderList.insert(0,(str(leftKeyTable) + "." + leftKey)) if (str(leftKeyTable) + "." + 'uuid') not in paramOrderList: paramOrderList.insert(0,(str(leftKeyTable) + ".uuid")) result += "group by " + ','.join(paramOrderList) elif ignore_aggregates == False: result += "group by " + selected_params return result
def group_columns(groupid): #Map/Dictionary that holds the final return data columnMap = {} #Find all the files inside groupid folder files = os.listdir(utils.get_analytics_lib()+"/events/"+groupid) #Traverse through file list for inFile in files: print inFile #Ignore .svn folder/files if not inFile.startswith('.'): try: f = open(utils.get_analytics_lib()+"/events/"+groupid+"/"+inFile,'r') for line in f.xreadlines(): col = str(line.strip()).split(",",1)[0] col_generic = None # Check if column is a float, has decimal value or # the value is an interger try: if(str(col).isdigit()): col = "c_"+col else: re_float = re.compile('\d+(\.\d+)?') if re_float.match(col): # If we have child columns, we should explicitly allow parent # to support selecting all of the child columns col_generic = "c_"+(str(col).split(".",1)[0]) col = "c_"+(str(col).replace(".","_")) except: continue #if(str(col).isdigit()): # col = "c_"+col if inFile in columnMap: if col_generic is not None and col_generic not in columnMap[inFile]: columnMap[inFile].append(col_generic) if col not in columnMap[inFile]: columnMap[inFile].append(col) else: columnMap[inFile] = [col] f.close() except IOError, e: continue
def table_columns(table, groupid): col_list = [] f = open(utils.get_analytics_lib()+"/events/"+groupid+"/"+table,'r') for line in f.xreadlines(): col = str(line.strip()).split(",",1)[0] if(col.isdigit()): #not in ("DeviceType","Device", "ByteMobile")): col = "c_"+col else: re_float = re.compile('\d+(\.\d+)?') if re_float.match(col): col = "c_"+(str(col).replace(".","_")) col_list.append(col.strip()) f.close() return col_list
def get_parameter_definition(parameter, groupid): ret_func = "" ret_alias = "" event, column = parameter.split('.',2) #Open the location of the event definition f = open(utils.get_analytics_lib()+"/events/"+str(groupid).strip()+"/"+str(event).strip(),'r') for line in f.xreadlines(): #Get the correct column in the definition: (column_name,column_alias,column_func) col_name, col_alias, col_func, col_type = line.split(',',4) #Check the col_name from definition, if it's a digit prefix 'C_' if col_name.isdigit(): col_name = "C_" + col_name #Check the col_name form the definition against the column name passed by parameter if col_name.upper() == column.upper(): #Found a match, grab the col_func and return it ret_func = col_func.strip() ret_alias = col_alias.strip() break f.close() #Check if an alias is avaiable, if not then make the alias equal column name if ret_alias == '': ret_alias = column return ret_func, ret_alias