Example #1
0
def getCommonDatesLocations(iterator,metadata,times,locations,dimensions,k):
    vNInd = '0'
    overallDateFormat = iterator.overallDateFormat if hasattr(iterator,'overallDateFormat') else ''
    dateFormat = iterator.dateFormat if hasattr(iterator,'dateFormat') else ''
    overallDate = iterator.overallDate if hasattr(iterator,'overallDate') else ''
    if overallDateFormat or dateFormat:
        DF = overallDateFormat + dateFormat
        F = td.mongotimeformatter(DF)
        T1 = [F(overallDate + x) for x in iterator.columnGroups['timeColNames'] if x in dimensions[k]]
        if overallDateFormat:
            reverseF = td.reverse(dateFormat)
            T2 = [F(overallDate + y) for y in map(reverseF,times[k])]
        else:
            T2 = times[k]
        mindate = min(T1 + T2)
        maxdate = max(T1 + T2)
        divisions = uniqify(ListUnion([td.getLowest(t) for t in T1 + T2]))
        metadata[k]['beginDate'] = mindate
        metadata[k]['endDate'] = maxdate
        metadata[k]['dateDivisions'] = divisions
    #locations
    if locations[k]:
        if hasattr(iterator,'overallLocation'):
            locs = [loc.integrate(iterator.overallLocation,l) for l in locations[k]]
        else:
            locs = locations[k]
        locs = locListUniqify(locs)
        metadata[k]['spatialDivisions'] = uniqify(ListUnion([loc.divisions(x) for x in locs]))
        metadata[k]['commonLocation'] = reduce(loc.intersect,locs)
Example #2
0
def initialize_argdict(collection):

    d = {} ; ArgDict = {}
    
    sliceCols = uniqify(Flatten(collection.sliceCols))
    sliceColList = ListUnion([[x] if x.split('.')[0] in collection.columns else collection.columnGroups.get(x,[]) for x in sliceCols])
    
    if hasattr(collection,'contentCols'):
        contentColList = ListUnion([[x] if x.split('.')[0] in collection.columns else collection.columnGroups.get(x,[]) for x in collection.contentCols])
        contentCols = uniqify(contentColList + sliceColList)
    else:
        contentCols = sliceColList
    contentColNums = getStrs(collection,contentCols)
    ArgDict['contentColNums'] = contentColNums
    
    if hasattr(collection,'dateFormat'):
        dateFormat = collection.dateFormat
        ArgDict['overallDateFormat'] = dateFormat
        timeFormatter = td.mongotimeformatter(dateFormat)
        ArgDict['timeFormatter'] = timeFormatter
    else:
        dateFormat = ''
        
    if hasattr(collection,'overallDate'):
        od = collection.overallDate['date']
        odf = collection.overallDate['format']
        ArgDict['overallDate'] = od
        overallDateFormat = odf + dateFormat
        ArgDict['overallDateFormat'] = overallDateFormat
        timeFormatter = td.mongotimeformatter(overallDateFormat)
        ArgDict['timeFormatter'] = timeFormatter

        OD = timeFormatter(overallDate +'X'*len(dateFormat))
        ArgDict['dateDivisions'] = td.getLowest(OD)
        ArgDict['datePhrases'] = [td.phrase(OD)]
        ArgDict['mindate'] = OD
        ArgDict['maxdate'] = OD             
        
        if dateFormat:
            reverseTimeFormatter = td.reverse(dateFormat)
            ArgDict['reverseTimeFormatter'] = reverseTimeFormatter
            
    else:
        od = ''
                    
    if 'timeColNames' in collection.columnGroups.keys():
        timeColNamesInd = getNums(collection,collection.columnGroups['timeColNames'])
        tcs = [timeFormatter(od + t) for t in collection.columnGroups['timeColNames']]
        ArgDict['timeColNames'] = tcs 
        ArgDict['timeColNameInds'] = timeColNamesInd
        ArgDict['timeColNameDivisions'] = [[td.TIME_DIVISIONS[x] for x in td.getLowest(tc)] for tc in tcs] 
        ArgDict['timeColNamePhrases'] = [td.phrase(t) for t in tcs]

    if 'timeColumns' in collection.columnGroups.keys():
        ArgDict['timeColInds'] = getNums(collection,collection.columnGroups['timeColumns'])
            
    #overall location
    if hasattr(collection,'overallLocation'):
        ol = collection.overallLocation
        ArgDict['overallLocation'] = ol
    else:
        ol = None
        
    #get divisions and phrases from OverallLocation and SpaceColNames
    if 'spaceColNames' in collection.columnGroups.keys():
        spaceColNames = collection.columnGroups['spaceColNames']
        ArgDict['spaceColNames'] = [loc.integrate(ol,x) for x in spaceColNames]

        
    if 'spaceColumns' in collection.columnGroups.keys():
        ArgDict['spaceColInds'] = getNums(collection,collection.columnGroups['spaceColumns'])

    Source = collection.source
    SourceNameDict = son.SON([(k,Source[k]['name'] if isinstance(Source[k],dict) else Source[k]) for k in Source.keys()])
    SourceAbbrevDict = dict([(k,Source[k]['shortName']) for k in Source.keys() if isinstance(Source[k],dict) and 'shortName' in Source[k].keys() ])
    d['sourceSpec'] = json.dumps(SourceNameDict,default=ju.default)
    d['agency'] = SourceNameDict['agency']
    d['subagency'] = SourceNameDict['subagency']
    d['dataset'] = SourceNameDict['dataset']
    for k in SourceNameDict.keys():
        d['source_' + str(k).lower()] = SourceNameDict[k]
    for k in SourceAbbrevDict.keys():
        d['source_' + str(k).lower() + '_acronym'] = SourceAbbrevDict[k]
    d['source'] = ' '.join(SourceNameDict.values() + SourceAbbrevDict.values())
        
    if 'subcollections' in collection.columns:
        ArgDict['subColInd'] = collection.columns.index('subcollections')
     
    value_processor_instructions = stringifyDictElements(collection.valueProcessors)
    vpcontext = commonjs.translatorContext(value_processor_instructions)
    ArgDict['valueProcessors'],ArgDict['valueProcessorsKey'] = get_processors(value_processor_instructions,collection, vpcontext ,commonjs.js_call)
    
                                    
    return d, ArgDict
Example #3
0
def largeAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate , overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames, spaceColInds, subColInd, valueProcessors):

    print '0'
    exists = []
    check = range(len(collection.columns))
    while check:
        k = check.pop(0)
        rec = collection.find_one( dict(query.items() + [(str(k),{'$exists':True})]))
        if rec:
            rec.pop('_id')
            new = map(int,rec.keys()) 
            check = list(set(check).difference(new))
            check.sort()
            exists += [(pos,collection.columns[pos]) for pos in new]
    
    print '1'
    exists = [e for e in exists if e[1] not in SPECIAL_KEYS] 
    (colnums,colnames) = zip(*exists)
    
    d['columnNames'] = colnames
    d['dimension'] = len(d['columnNames'])
       
    if overallDateFormat:
        d['dateFormat'] = overallDateFormat
        
        if timeColInds:
            dateColVals = ListUnion([collection.find(query).distinct(str(t)) for t in timeColInds if t in colnums])
            if overallDate:
                dateColVals = [timeFormatter(overallDate + reverseTimeFormatter(time)) for time in dateColVals]
        
            dateDivisions += uniqify(ListUnion(map(td.getLowest,dateColVals)))
            datePhrases += uniqify(map(td.phrase, dateColVals))
            mindate = td.makemin(mindate,min(dateColVals),)
            maxdate = td.makemax(maxdate,max(dateColVals),)
      
  
        if timeColNameInds:
            K = [k for (k,j) in enumerate(timeColNameInds) if k in colnums]
            dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K]))
            mindate = td.makemin(mindate,min([timeColNames[k] for k in K]),)
            maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]),)
            datePhrases += [timeColNamePhrases[k] for k in K]

        dateDivisions = uniqify(dateDivisions)
        datePhrases = uniqify(datePhrases)
        
        d['beginDate'] = td.convertToDT(mindate)
        d['endDate'] = td.convertToDT(maxdate,convertMode='High')
        d['dateDivisions'] = uniqify(dateDivisions)
        d['datePhrases'] = datePhrases
    
    print '2'    
    if spaceColInds:
        spaceColVals = ListUnion([collection.find(query).distinct(str(t)) for t in spaceColInds if t in colnums])
        spaceColVals = [loc.integrate(overallLocation,scv) for scv in spaceColVals]   
    else:
        spaceColVals = []
    spaceVals = spaceColNames + spaceColVals 
    if spaceVals:
        d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals)))
        d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals)))
        d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals))
        d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals))
    commonLocation = overallLocation
    for sv in spaceVals:
        commonLocation = loc.intersect(commonLocation,sv)
        if not commonLocation:
            break 
    if commonLocation:
        d['commonLocation'] = loc.phrase(commonLocation)
                
    print '3'    

    d['sliceContents'] = ' '.join(uniqify(ListUnion([translate_list(valueProcessors.get(x,None) ,map(decode_obj,collection.find(query).distinct(x))) for x in contentColNums])))

    return d
Example #4
0
def smallAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate, overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames , spaceColInds ,subColInd, valueProcessors,slicecount):

    R = collection.find(query,timeout=False)
    colnames = []
    d['sliceContents'] = []
    Subcollections = []
    
    spaceVals = spaceColNames
    commonLocation = overallLocation    
    for sv in spaceColNames:
        commonLocation = loc.intersect(commonLocation,sv)
        if not commonLocation:
            break     
  
    for (i,r) in enumerate(R):
        d['sliceContents'].append(' '.join([translate(valueProcessors.get(x,None),decode_obj(rgetattr(r,x.split('.')))) if rhasattr(r,x.split('.')) else '' for x in contentColNums]))
                      
        colnames  = uniqify(colnames + r.keys())
        
        if subColInd:
            Subcollections += r[str(subColInd)]
                
        if timeColInds:
            for x in timeColInds:
                if str(x) in r.keys():
                    time = r[str(x)]
                    if overallDate:
                        time = timeFormatter(overallDate + reverseTimeFormatter(time))
                    dateDivisions += td.getLowest(time)
                    datePhrases.append(td.phrase(time))     
                    mindate = td.makemin(mindate,time)
                    maxdate = td.makemax(maxdate,time)
        if spaceColInds:
            for x in spaceColInds:
                if str(x) in r.keys():
                    location = loc.integrate(overallLocation,r[str(x)])
                    commonLocation = loc.intersect(commonLocation,r[str(x)]) if commonLocation != None else None
                    spaceVals.append(location)
    
    d['sliceContents'] = ' '.join(d['sliceContents'])
    Subcollections = uniqify(Subcollections)
    d['columnNames'] = [collection.columns[int(x)] for x in colnames if x.isdigit()]
    d['dimension'] = len(d['columnNames'])
    #time/date
        
    if overallDateFormat:
        d['dateFormat'] = overallDateFormat
        
        if 'timeColNames' in collection.columnGroups.keys():
            K = [k for (k,j) in enumerate(timeColNameInds) if str(j) in colnames]
            dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K]))
            mindate = td.makemin(mindate,min([timeColNames[k] for k in K]))
            maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]))         
            datePhrases += uniqify([timeColNamePhrases[k] for k in K])
        
        d['beginDate'] = td.convertToDT(mindate)
        d['endDate'] = td.convertToDT(maxdate,convertMode='High')
        d['dateDivisions'] = uniqify(dateDivisions)
        d['datePhrases'] = datePhrases if d['volume'] < 10000 else uniqify(datePhrases)

    if spaceVals:
        d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals)))
        d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals)))
        d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals))
        d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals))
        
    return d