def getCommonDatesLocations(iterator,metadata,times,locations,dimensions,k): vNInd = '0' overallDateFormat = iterator.overallDateFormat if hasattr(iterator,'overallDateFormat') else '' dateFormat = iterator.dateFormat if hasattr(iterator,'dateFormat') else '' overallDate = iterator.overallDate if hasattr(iterator,'overallDate') else '' if overallDateFormat or dateFormat: DF = overallDateFormat + dateFormat F = td.mongotimeformatter(DF) T1 = [F(overallDate + x) for x in iterator.columnGroups['timeColNames'] if x in dimensions[k]] if overallDateFormat: reverseF = td.reverse(dateFormat) T2 = [F(overallDate + y) for y in map(reverseF,times[k])] else: T2 = times[k] mindate = min(T1 + T2) maxdate = max(T1 + T2) divisions = uniqify(ListUnion([td.getLowest(t) for t in T1 + T2])) metadata[k]['beginDate'] = mindate metadata[k]['endDate'] = maxdate metadata[k]['dateDivisions'] = divisions #locations if locations[k]: if hasattr(iterator,'overallLocation'): locs = [loc.integrate(iterator.overallLocation,l) for l in locations[k]] else: locs = locations[k] locs = locListUniqify(locs) metadata[k]['spatialDivisions'] = uniqify(ListUnion([loc.divisions(x) for x in locs])) metadata[k]['commonLocation'] = reduce(loc.intersect,locs)
def initialize_argdict(collection): d = {} ; ArgDict = {} sliceCols = uniqify(Flatten(collection.sliceCols)) sliceColList = ListUnion([[x] if x.split('.')[0] in collection.columns else collection.columnGroups.get(x,[]) for x in sliceCols]) if hasattr(collection,'contentCols'): contentColList = ListUnion([[x] if x.split('.')[0] in collection.columns else collection.columnGroups.get(x,[]) for x in collection.contentCols]) contentCols = uniqify(contentColList + sliceColList) else: contentCols = sliceColList contentColNums = getStrs(collection,contentCols) ArgDict['contentColNums'] = contentColNums if hasattr(collection,'dateFormat'): dateFormat = collection.dateFormat ArgDict['overallDateFormat'] = dateFormat timeFormatter = td.mongotimeformatter(dateFormat) ArgDict['timeFormatter'] = timeFormatter else: dateFormat = '' if hasattr(collection,'overallDate'): od = collection.overallDate['date'] odf = collection.overallDate['format'] ArgDict['overallDate'] = od overallDateFormat = odf + dateFormat ArgDict['overallDateFormat'] = overallDateFormat timeFormatter = td.mongotimeformatter(overallDateFormat) ArgDict['timeFormatter'] = timeFormatter OD = timeFormatter(overallDate +'X'*len(dateFormat)) ArgDict['dateDivisions'] = td.getLowest(OD) ArgDict['datePhrases'] = [td.phrase(OD)] ArgDict['mindate'] = OD ArgDict['maxdate'] = OD if dateFormat: reverseTimeFormatter = td.reverse(dateFormat) ArgDict['reverseTimeFormatter'] = reverseTimeFormatter else: od = '' if 'timeColNames' in collection.columnGroups.keys(): timeColNamesInd = getNums(collection,collection.columnGroups['timeColNames']) tcs = [timeFormatter(od + t) for t in collection.columnGroups['timeColNames']] ArgDict['timeColNames'] = tcs ArgDict['timeColNameInds'] = timeColNamesInd ArgDict['timeColNameDivisions'] = [[td.TIME_DIVISIONS[x] for x in td.getLowest(tc)] for tc in tcs] ArgDict['timeColNamePhrases'] = [td.phrase(t) for t in tcs] if 'timeColumns' in collection.columnGroups.keys(): ArgDict['timeColInds'] = getNums(collection,collection.columnGroups['timeColumns']) #overall location if hasattr(collection,'overallLocation'): ol = collection.overallLocation ArgDict['overallLocation'] = ol else: ol = None #get divisions and phrases from OverallLocation and SpaceColNames if 'spaceColNames' in collection.columnGroups.keys(): spaceColNames = collection.columnGroups['spaceColNames'] ArgDict['spaceColNames'] = [loc.integrate(ol,x) for x in spaceColNames] if 'spaceColumns' in collection.columnGroups.keys(): ArgDict['spaceColInds'] = getNums(collection,collection.columnGroups['spaceColumns']) Source = collection.source SourceNameDict = son.SON([(k,Source[k]['name'] if isinstance(Source[k],dict) else Source[k]) for k in Source.keys()]) SourceAbbrevDict = dict([(k,Source[k]['shortName']) for k in Source.keys() if isinstance(Source[k],dict) and 'shortName' in Source[k].keys() ]) d['sourceSpec'] = json.dumps(SourceNameDict,default=ju.default) d['agency'] = SourceNameDict['agency'] d['subagency'] = SourceNameDict['subagency'] d['dataset'] = SourceNameDict['dataset'] for k in SourceNameDict.keys(): d['source_' + str(k).lower()] = SourceNameDict[k] for k in SourceAbbrevDict.keys(): d['source_' + str(k).lower() + '_acronym'] = SourceAbbrevDict[k] d['source'] = ' '.join(SourceNameDict.values() + SourceAbbrevDict.values()) if 'subcollections' in collection.columns: ArgDict['subColInd'] = collection.columns.index('subcollections') value_processor_instructions = stringifyDictElements(collection.valueProcessors) vpcontext = commonjs.translatorContext(value_processor_instructions) ArgDict['valueProcessors'],ArgDict['valueProcessorsKey'] = get_processors(value_processor_instructions,collection, vpcontext ,commonjs.js_call) return d, ArgDict
def largeAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate , overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames, spaceColInds, subColInd, valueProcessors): print '0' exists = [] check = range(len(collection.columns)) while check: k = check.pop(0) rec = collection.find_one( dict(query.items() + [(str(k),{'$exists':True})])) if rec: rec.pop('_id') new = map(int,rec.keys()) check = list(set(check).difference(new)) check.sort() exists += [(pos,collection.columns[pos]) for pos in new] print '1' exists = [e for e in exists if e[1] not in SPECIAL_KEYS] (colnums,colnames) = zip(*exists) d['columnNames'] = colnames d['dimension'] = len(d['columnNames']) if overallDateFormat: d['dateFormat'] = overallDateFormat if timeColInds: dateColVals = ListUnion([collection.find(query).distinct(str(t)) for t in timeColInds if t in colnums]) if overallDate: dateColVals = [timeFormatter(overallDate + reverseTimeFormatter(time)) for time in dateColVals] dateDivisions += uniqify(ListUnion(map(td.getLowest,dateColVals))) datePhrases += uniqify(map(td.phrase, dateColVals)) mindate = td.makemin(mindate,min(dateColVals),) maxdate = td.makemax(maxdate,max(dateColVals),) if timeColNameInds: K = [k for (k,j) in enumerate(timeColNameInds) if k in colnums] dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K])) mindate = td.makemin(mindate,min([timeColNames[k] for k in K]),) maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]),) datePhrases += [timeColNamePhrases[k] for k in K] dateDivisions = uniqify(dateDivisions) datePhrases = uniqify(datePhrases) d['beginDate'] = td.convertToDT(mindate) d['endDate'] = td.convertToDT(maxdate,convertMode='High') d['dateDivisions'] = uniqify(dateDivisions) d['datePhrases'] = datePhrases print '2' if spaceColInds: spaceColVals = ListUnion([collection.find(query).distinct(str(t)) for t in spaceColInds if t in colnums]) spaceColVals = [loc.integrate(overallLocation,scv) for scv in spaceColVals] else: spaceColVals = [] spaceVals = spaceColNames + spaceColVals if spaceVals: d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals))) d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals))) d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals)) d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals)) commonLocation = overallLocation for sv in spaceVals: commonLocation = loc.intersect(commonLocation,sv) if not commonLocation: break if commonLocation: d['commonLocation'] = loc.phrase(commonLocation) print '3' d['sliceContents'] = ' '.join(uniqify(ListUnion([translate_list(valueProcessors.get(x,None) ,map(decode_obj,collection.find(query).distinct(x))) for x in contentColNums]))) return d
def smallAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate, overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames , spaceColInds ,subColInd, valueProcessors,slicecount): R = collection.find(query,timeout=False) colnames = [] d['sliceContents'] = [] Subcollections = [] spaceVals = spaceColNames commonLocation = overallLocation for sv in spaceColNames: commonLocation = loc.intersect(commonLocation,sv) if not commonLocation: break for (i,r) in enumerate(R): d['sliceContents'].append(' '.join([translate(valueProcessors.get(x,None),decode_obj(rgetattr(r,x.split('.')))) if rhasattr(r,x.split('.')) else '' for x in contentColNums])) colnames = uniqify(colnames + r.keys()) if subColInd: Subcollections += r[str(subColInd)] if timeColInds: for x in timeColInds: if str(x) in r.keys(): time = r[str(x)] if overallDate: time = timeFormatter(overallDate + reverseTimeFormatter(time)) dateDivisions += td.getLowest(time) datePhrases.append(td.phrase(time)) mindate = td.makemin(mindate,time) maxdate = td.makemax(maxdate,time) if spaceColInds: for x in spaceColInds: if str(x) in r.keys(): location = loc.integrate(overallLocation,r[str(x)]) commonLocation = loc.intersect(commonLocation,r[str(x)]) if commonLocation != None else None spaceVals.append(location) d['sliceContents'] = ' '.join(d['sliceContents']) Subcollections = uniqify(Subcollections) d['columnNames'] = [collection.columns[int(x)] for x in colnames if x.isdigit()] d['dimension'] = len(d['columnNames']) #time/date if overallDateFormat: d['dateFormat'] = overallDateFormat if 'timeColNames' in collection.columnGroups.keys(): K = [k for (k,j) in enumerate(timeColNameInds) if str(j) in colnames] dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K])) mindate = td.makemin(mindate,min([timeColNames[k] for k in K])) maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K])) datePhrases += uniqify([timeColNamePhrases[k] for k in K]) d['beginDate'] = td.convertToDT(mindate) d['endDate'] = td.convertToDT(maxdate,convertMode='High') d['dateDivisions'] = uniqify(dateDivisions) d['datePhrases'] = datePhrases if d['volume'] < 10000 else uniqify(datePhrases) if spaceVals: d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals))) d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals))) d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals)) d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals)) return d