Esempio n. 1
0
def largeAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate , overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames, spaceColInds, subColInd, valueProcessors):

    print '0'
    exists = []
    check = range(len(collection.columns))
    while check:
        k = check.pop(0)
        rec = collection.find_one( dict(query.items() + [(str(k),{'$exists':True})]))
        if rec:
            rec.pop('_id')
            new = map(int,rec.keys()) 
            check = list(set(check).difference(new))
            check.sort()
            exists += [(pos,collection.columns[pos]) for pos in new]
    
    print '1'
    exists = [e for e in exists if e[1] not in SPECIAL_KEYS] 
    (colnums,colnames) = zip(*exists)
    
    d['columnNames'] = colnames
    d['dimension'] = len(d['columnNames'])
       
    if overallDateFormat:
        d['dateFormat'] = overallDateFormat
        
        if timeColInds:
            dateColVals = ListUnion([collection.find(query).distinct(str(t)) for t in timeColInds if t in colnums])
            if overallDate:
                dateColVals = [timeFormatter(overallDate + reverseTimeFormatter(time)) for time in dateColVals]
        
            dateDivisions += uniqify(ListUnion(map(td.getLowest,dateColVals)))
            datePhrases += uniqify(map(td.phrase, dateColVals))
            mindate = td.makemin(mindate,min(dateColVals),)
            maxdate = td.makemax(maxdate,max(dateColVals),)
      
  
        if timeColNameInds:
            K = [k for (k,j) in enumerate(timeColNameInds) if k in colnums]
            dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K]))
            mindate = td.makemin(mindate,min([timeColNames[k] for k in K]),)
            maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]),)
            datePhrases += [timeColNamePhrases[k] for k in K]

        dateDivisions = uniqify(dateDivisions)
        datePhrases = uniqify(datePhrases)
        
        d['beginDate'] = td.convertToDT(mindate)
        d['endDate'] = td.convertToDT(maxdate,convertMode='High')
        d['dateDivisions'] = uniqify(dateDivisions)
        d['datePhrases'] = datePhrases
    
    print '2'    
    if spaceColInds:
        spaceColVals = ListUnion([collection.find(query).distinct(str(t)) for t in spaceColInds if t in colnums])
        spaceColVals = [loc.integrate(overallLocation,scv) for scv in spaceColVals]   
    else:
        spaceColVals = []
    spaceVals = spaceColNames + spaceColVals 
    if spaceVals:
        d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals)))
        d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals)))
        d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals))
        d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals))
    commonLocation = overallLocation
    for sv in spaceVals:
        commonLocation = loc.intersect(commonLocation,sv)
        if not commonLocation:
            break 
    if commonLocation:
        d['commonLocation'] = loc.phrase(commonLocation)
                
    print '3'    

    d['sliceContents'] = ' '.join(uniqify(ListUnion([translate_list(valueProcessors.get(x,None) ,map(decode_obj,collection.find(query).distinct(x))) for x in contentColNums])))

    return d
Esempio n. 2
0
def smallAdd(d,query,collection,contentColNums, timeColInds ,timeColNames , timeColNameInds ,timeColNameDivisions ,timeColNamePhrases ,overallDate, overallDateFormat, timeFormatter ,reverseTimeFormatter ,dateDivisions ,datePhrases ,mindate ,maxdate ,overallLocation , spaceColNames , spaceColInds ,subColInd, valueProcessors,slicecount):

    R = collection.find(query,timeout=False)
    colnames = []
    d['sliceContents'] = []
    Subcollections = []
    
    spaceVals = spaceColNames
    commonLocation = overallLocation    
    for sv in spaceColNames:
        commonLocation = loc.intersect(commonLocation,sv)
        if not commonLocation:
            break     
  
    for (i,r) in enumerate(R):
        d['sliceContents'].append(' '.join([translate(valueProcessors.get(x,None),decode_obj(rgetattr(r,x.split('.')))) if rhasattr(r,x.split('.')) else '' for x in contentColNums]))
                      
        colnames  = uniqify(colnames + r.keys())
        
        if subColInd:
            Subcollections += r[str(subColInd)]
                
        if timeColInds:
            for x in timeColInds:
                if str(x) in r.keys():
                    time = r[str(x)]
                    if overallDate:
                        time = timeFormatter(overallDate + reverseTimeFormatter(time))
                    dateDivisions += td.getLowest(time)
                    datePhrases.append(td.phrase(time))     
                    mindate = td.makemin(mindate,time)
                    maxdate = td.makemax(maxdate,time)
        if spaceColInds:
            for x in spaceColInds:
                if str(x) in r.keys():
                    location = loc.integrate(overallLocation,r[str(x)])
                    commonLocation = loc.intersect(commonLocation,r[str(x)]) if commonLocation != None else None
                    spaceVals.append(location)
    
    d['sliceContents'] = ' '.join(d['sliceContents'])
    Subcollections = uniqify(Subcollections)
    d['columnNames'] = [collection.columns[int(x)] for x in colnames if x.isdigit()]
    d['dimension'] = len(d['columnNames'])
    #time/date
        
    if overallDateFormat:
        d['dateFormat'] = overallDateFormat
        
        if 'timeColNames' in collection.columnGroups.keys():
            K = [k for (k,j) in enumerate(timeColNameInds) if str(j) in colnames]
            dateDivisions += uniqify(ListUnion([timeColNameDivisions[k] for k in K]))
            mindate = td.makemin(mindate,min([timeColNames[k] for k in K]))
            maxdate = td.makemax(maxdate,max([timeColNames[k] for k in K]))         
            datePhrases += uniqify([timeColNamePhrases[k] for k in K])
        
        d['beginDate'] = td.convertToDT(mindate)
        d['endDate'] = td.convertToDT(maxdate,convertMode='High')
        d['dateDivisions'] = uniqify(dateDivisions)
        d['datePhrases'] = datePhrases if d['volume'] < 10000 else uniqify(datePhrases)

    if spaceVals:
        d['spatialDivisions'] = uniqify(ListUnion(map(loc.divisions,spaceVals)))
        d['spatialDivisionsTight'] = uniqify(ListUnion(map(loc.divisions2,spaceVals)))
        d['spatialPhrases'] = uniqify(map(loc.phrase,spaceVals))
        d['spatialPhrasesTight'] = uniqify(map(loc.phrase2,spaceVals))
        
    return d