def largeAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate , overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames, spaceColInds, subColInd, valueProcessors): print '0' exists = [] check = range(len(collection.columns)) while check: k = check.pop(0) rec = collection.find_one( dict(query.items() + [(str(k),{'$exists':True})])) if rec: rec.pop('_id') new = map(int,rec.keys()) check = list(set(check).difference(new)) check.sort() exists += [(pos,collection.columns[pos]) for pos in new] print '1' exists = [e for e in exists if e[1] not in SPECIAL_KEYS] (colnums,colnames) = zip(*exists) d['columnNames'] = colnames d['dimension'] = len(d['columnNames']) if overallDateFormat: d['dateFormat'] = overallDateFormat if timeColInds: dateColVals = ListUnion([collection.find(query).distinct(str(t)) for t in timeColInds if t in colnums]) if overallDate: dateColVals = [timeFormatter(overallDate + reverseTimeFormatter(time)) for time in dateColVals] dateDivisions += uniqify(ListUnion(map(td.getLowest,dateColVals))) datePhrases += uniqify(map(td.phrase, dateColVals)) mindate = td.makemin(mindate,min(dateColVals),) maxdate = td.makemax(maxdate,max(dateColVals),) if timeColNameInds: K = [k for (k,j) in enumerate(timeColNameInds) if k in colnums] dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K])) mindate = td.makemin(mindate,min([timeColNames[k] for k in K]),) maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]),) datePhrases += [timeColNamePhrases[k] for k in K] dateDivisions = uniqify(dateDivisions) datePhrases = uniqify(datePhrases) d['beginDate'] = td.convertToDT(mindate) d['endDate'] = td.convertToDT(maxdate,convertMode='High') d['dateDivisions'] = uniqify(dateDivisions) d['datePhrases'] = datePhrases print '2' if spaceColInds: spaceColVals = ListUnion([collection.find(query).distinct(str(t)) for t in spaceColInds if t in colnums]) spaceColVals = [loc.integrate(overallLocation,scv) for scv in spaceColVals] else: spaceColVals = [] spaceVals = spaceColNames + spaceColVals if spaceVals: d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals))) d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals))) d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals)) d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals)) commonLocation = overallLocation for sv in spaceVals: commonLocation = loc.intersect(commonLocation,sv) if not commonLocation: break if commonLocation: d['commonLocation'] = loc.phrase(commonLocation) print '3' d['sliceContents'] = ' '.join(uniqify(ListUnion([translate_list(valueProcessors.get(x,None) ,map(decode_obj,collection.find(query).distinct(x))) for x in contentColNums]))) return d
def smallAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate, overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames , spaceColInds ,subColInd, valueProcessors,slicecount): R = collection.find(query,timeout=False) colnames = [] d['sliceContents'] = [] Subcollections = [] spaceVals = spaceColNames commonLocation = overallLocation for sv in spaceColNames: commonLocation = loc.intersect(commonLocation,sv) if not commonLocation: break for (i,r) in enumerate(R): d['sliceContents'].append(' '.join([translate(valueProcessors.get(x,None),decode_obj(rgetattr(r,x.split('.')))) if rhasattr(r,x.split('.')) else '' for x in contentColNums])) colnames = uniqify(colnames + r.keys()) if subColInd: Subcollections += r[str(subColInd)] if timeColInds: for x in timeColInds: if str(x) in r.keys(): time = r[str(x)] if overallDate: time = timeFormatter(overallDate + reverseTimeFormatter(time)) dateDivisions += td.getLowest(time) datePhrases.append(td.phrase(time)) mindate = td.makemin(mindate,time) maxdate = td.makemax(maxdate,time) if spaceColInds: for x in spaceColInds: if str(x) in r.keys(): location = loc.integrate(overallLocation,r[str(x)]) commonLocation = loc.intersect(commonLocation,r[str(x)]) if commonLocation != None else None spaceVals.append(location) d['sliceContents'] = ' '.join(d['sliceContents']) Subcollections = uniqify(Subcollections) d['columnNames'] = [collection.columns[int(x)] for x in colnames if x.isdigit()] d['dimension'] = len(d['columnNames']) #time/date if overallDateFormat: d['dateFormat'] = overallDateFormat if 'timeColNames' in collection.columnGroups.keys(): K = [k for (k,j) in enumerate(timeColNameInds) if str(j) in colnames] dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K])) mindate = td.makemin(mindate,min([timeColNames[k] for k in K])) maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K])) datePhrases += uniqify([timeColNamePhrases[k] for k in K]) d['beginDate'] = td.convertToDT(mindate) d['endDate'] = td.convertToDT(maxdate,convertMode='High') d['dateDivisions'] = uniqify(dateDivisions) d['datePhrases'] = datePhrases if d['volume'] < 10000 else uniqify(datePhrases) if spaceVals: d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals))) d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals))) d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals)) d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals)) return d