def loadRawdata(rawfile=None, updbmode=1): """ rawfile: rawdata csv file. updbmode: update db mode: 1-all, 2-incr. Init *algo* tables with rawdata csv(16 columns) -- SLOW if csv is big, try offline.doClusterAll(rawdata) -> db.loadClusteredData() instead. 1) db.initTables(): init db tables if update all the db data. 2) db.updateIndexes(): update tables indexes, drop old idxs if only update db incrementally. 3) offline.doClusterIncr(): incremental clustering. """ dbips = DB_OFFLINE doflush = True for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) if updbmode == 1: # Create WPP tables. wppdb.initTables(doDrop=True) doflush = False # Update indexs. wppdb.updateIndexes(doflush) # Load csv clustered data into DB tables. n_inserts = doClusterIncr(fd_csv=file(rawfile), wppdb=wppdb) print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps']) # Init ver_uprecs in |wpp_uprecsver| if it's empty. if wppdb.getRawdataVersion() is None: wppdb.setRawdataVersion('0') wppdb.close()
def test_fixPosWLAN(): """WLAN positioning test case 1-31""" # $ for i in $(seq 31); do python wpp/location.py -f $i; done # $ egrep -A1 'final|NO cluster' wpp.log |grep -v 'Sele' |grep -v final |grep -v '\-\-' | \ # sed 's/^<2011.*$/\[\]/g' |sed 's/\(.*\)/\1,/g' poss_ok = [ [], [], [], [], [], [], [], [39.912616, 116.3521475, 50], [39.912782, 116.352266, 50], [39.91257075, 116.35363975, 122.54288388709156], [39.912613571428572, 116.35301342857143, 108.37042404018443], [39.91257075, 116.35363975, 122.54288388709156], [39.91245, 116.352029, 50], [39.91257199999999, 116.35131033333333, 96.876048832124482], [39.910843, 116.352233, 50], [39.912782, 116.352266, 50], [39.912506666666665, 116.34972933333334, 50], [39.896571000000002, 116.347176, 100], [], [], [39.894749695652173, 116.34846693478261, 504.49033670393203], [39.903174187499999, 116.3043408125, 205.26326958257582], [39.911346999999999, 116.367709, 50], [39.905437777777777, 116.30197872222223, 71.863904363421156], [39.898307000000003, 116.367233, 50], [39.896256999999999, 116.345404, 50], [39.866599000000001, 116.33084275, 140.93180577820135], [39.898285222222221, 116.37795088888889, 50], [39.907567142857147, 116.3518077142857, 82.052322921173257], [39.906203714285709, 116.31805528571428, 50], [39.907556527131781, 116.35137625581396, 50],] dbsvr = dbsvrs[DB_ONLINE] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) for i, pos_ok in enumerate(poss_ok): len_visAPs, wifis = getWLAN(i+1) pos_test = fixPosWLAN(len_visAPs, wifis, wppdb, True) assert_equal(pos_ok, pos_test)
def crawlAreaLocData(): """ 1) fetch 100 records with flag area_ok = 0. 2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2). 3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; else |wpp_uprecsinfo|.area_try += 1 and quit. 4) search area_code for the found district, insert area location (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1. """ fail_history = {} dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) # select config.CRAWL_LIMIT raw fps which haven't tried for google area location. fps_noarea = wppdb.getCrawlFPs() for fp in fps_noarea: # try areaLocation(laccid) laccid = '%s-%s' % (fp[8], fp[9]) if laccid in fail_history: continue time = fp[2] print laccid, time if wppdb.areaLocation(laccid): # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) else: print fp # try google area location. geoaddr = googleAreaLocation( latlon=(fp[11], fp[12]) ) # area_try += 1 & quit wppdb.setUprecAreaTry(area_try=fp[18]+1, time=time) if geoaddr: # insert area location info(laccid~geoaddr) into |wpp_cellarea|. # till now, area_location: 'laccid,area_code,province>city>district'. area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr) if not area_location: if not laccid in fail_history: fail_history[laccid] = geoaddr print 'Failed to add area location: [%s] for cell[%s]' % \ (geoaddr[-1].encode('utf8'), laccid) continue # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) print area_location.encode('utf8') # encode('utf8') for crontab. else: if geoaddr is None: sys.exit(0) # OVER_QUERY_LIMIT. else: pass
def main(): import getopt try: opts, args = getopt.getopt(sys.argv[1:], # NO backward compatibility for file handling, so the relevant # methods(os,pprint)/parameters(addr_book,XXXPATH) # imported from standard or 3rd-party modules can be avoided. "f:hv", ["fake","help","verbose"]) except getopt.GetoptError: print 'Error: getopt!\n' usage(); sys.exit(99) # Program terminated when NO argument followed! #if not opts: usage(); sys.exit(0) # vars init. verbose = False; wlanfake = 0 for o,a in opts: if o in ("-f", "--fake"): if a.isdigit(): wlanfake = int(a) if wlanfake >= 0: continue else: pass else: pass print '\nIllegal fake WLAN scan ID: %s!' % a usage(); sys.exit(99) elif o in ("-h", "--help"): usage(); sys.exit(0) elif o in ("-v", "--verbose"): verbose = True else: print 'Parameter NOT supported: %s' % o usage(); sys.exit(99) # Get WLAN scanning results. len_visAPs, wifis = getWLAN(wlanfake) # Fix current position. dbsvr = dbsvrs[DB_ONLINE] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) posresult = fixPosWLAN(len_visAPs, wifis, wppdb, verbose) if not posresult: sys.exit(99) wpplog.debug('final posfix/poserr: \n%s' % posresult)
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp,localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!'; continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers':[], 'details':''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-'*40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader( BZ2File(bzfile) ) try: indat = np_array([ line for line in csvdat ]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([ [ver_bzfile,0,0] for i in xrange(len(indat)) ]) indat_withvers = np_append(indat, append_info, axis=1).tolist(); print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue
def fixPos(posreq=None, has_google=False, mc=None): xmlnodes = xmlparser(posreq).getchildren() # Parameters default vals init. lat, lon, ee = 39.9055, 116.3914, 5000 errinfo = 'AccuTooBad'; errcode = '102' # logic control switch init. pos_area = pos_pt = False # Default *PosLevel* is Point if not specified. # WppDB connection init. dbsvr = dbsvrs[DB_ONLINE] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) # lambda func init. f = lambda x : [ node.attrib for node in xmlnodes if node.tag == x ] plevel = f('PosLevel') # Area location related parameters interpretation & default vals init. plevel = plevel[0]['val'] if plevel else 'Point' acode = addr = '' if plevel == 'Hybrid': pos_area = pos_pt = True elif plevel == 'Area': pos_area = True else: pos_pt = True plevel = 'Point' # PosLevel default *Point*. if pos_area: # Area location. cell = f('CellInfo') if cell: laccid = '%s-%s' % (cell[0]['lac'], cell[0]['cid']) acode_addr = wppdb.areaLocation(laccid) if acode_addr: acode, addr = acode_addr errinfo='OK'; errcode='100' lat = lon = ee = '' if pos_pt: # Point location, which returns 3d coordinates. macs = f('WLANIdentifier'); rsss = f('WLANMatcher'); need_google = False; if macs and rsss: macs = macs[0]['val'].split('|') rsss = rsss[0]['val'].split('|') INTERSET = min(CLUSTERKEYSIZE, len(macs)) idxs_max = argsort(rsss)[:INTERSET] macsrsss = vstack((macs, rsss))[:,idxs_max] wlanloc = fixPosWLAN(INTERSET, macsrsss, wppdb, DEBUG_ALGO) if not wlanloc: need_google = True else: wlanloc = [] if not wlanloc: if not pos_area: cell = f('CellInfo') if cell: if not pos_area: laccid = '%s-%s' % (cell[0]['lac'], cell[0]['cid']) celloc = wppdb.laccidLocation(laccid) if not celloc: need_google = True wpplog.error('Cell location FAILED!') elif celloc[2] > GOOG_ERR_LIMIT: need_google = False # googleLocation err too big for wlanloc. else: pass else: celloc = [] loc = wlanloc or celloc if loc: lat, lon, ee = loc errinfo = 'OK'; errcode = '100' # TODO: make googleLocation async job when wlanloc fails & celloc succeeds. # Try Google location, when wifi location failed && wifi info exists. if need_google and has_google: loc_google = googleLocation(macs=macs, rsss=rsss, cellinfo=cell[0], mc=mc) if loc_google: lat1, lon1, h, ee_goog = loc_google if not loc: lat, lon, ee = lat1, lon1, ee_goog errinfo = 'OK'; errcode = '100' # wifi location import. TODO: make google loc import job async when it's *succeeded*. if macs and ee_goog <= GOOG_ERR_LIMIT: t = f('Time') t = t[0]['val'] if t else '' fp = '1000, 1000101, %s%s%s, %s, %s, %s, %s' % \ (t,','*9,lat1, lon1, h, '|'.join(macs), '|'.join(rsss)) n = doClusterIncr(fd_csv=StringIO(fp), wppdb=wppdb, verb=False) if n['n_newfps'] == 1: wpplog.info('Added 1 WLAN FP from Google') else: wpplog.error('Failed to add FP from Google!') # Cell location import. if cell and not celloc: if ee_goog <= GOOG_ERR_LIMIT: loc_google[-1] = 500 wppdb.addCellLocation(laccid=laccid, loc=loc_google) wpplog.info('Added 1 Cell FP from Google') else: wpplog.error('Google location FAILED!') wppdb.close() if plevel == 'Hybrid': posresp = POS_RESP_FULL % (errcode, errinfo, lat, lon, ee, plevel, acode, addr) elif plevel == 'Area': posresp = POS_RESP_AREA % (errcode, errinfo, plevel, acode, addr) else: posresp = POS_RESP_PT % (errcode, errinfo, lat, lon, ee, plevel) return posresp
def main(): import getopt try: opts, args = getopt.getopt(sys.argv[1:], "ac:f:hi:k:m:nr:st:uv", ["areacrawl","cluster","floor=","help","spid=","kml=","mode=","no-dump", "rawdata","scan","to-rmp=","updatedb","verbose"]) except getopt.GetoptError: usage() sys.exit(99) if not opts: usage(); sys.exit(0) # global vars init. crawl_area=False; updatedb=False; doLoadRawdata=False; scan=False #spid=0; tormp=False; tfail=0; dokml=False; rawfile=None; docluster=False; updbmode=1 global verbose,pp,floor,nodump verbose=False; pp=None; nodump=False; floor=False for o,a in opts: if o in ("-a", "--areacrawl"): crawl_area = True elif o in ("-c", "--cluster"): if not a.isdigit(): print '\ncluster type: %s should be an INTEGER!' % str(a) usage(); sys.exit(99) else: # 1-All; 2-Incr. cluster_type = int(a) docluster = True rmpfile = sys.argv[3] if not os.path.isfile(rmpfile): print 'Raw data file NOT exist: %s!' % rmpfile sys.exit(99) #elif o in ("-i", "--spid"): # if a.isdigit(): spid = int(a) # else: # print '\nspid: %s should be an INTEGER!' % str(a) # usage(); sys.exit(99) elif o in ("-m", "--mode"): if a.isdigit(): updbmode = int(a) if not (1 <= updbmode <= 2): print '\nError: updatedb mode: (%d) NOT supported yet!' % updbmode usage(); sys.exit(99) else: print '\nmode: %s should be an INTEGER!' % str(a) usage(); sys.exit(99) elif o in ("-r", "--rawdata"): if not os.path.isfile(a): print 'Rawdata file NOT exist: %s' % a sys.exit(99) else: doLoadRawdata = True rawfile = a elif o in ("-s", "--scan"): scan = True #elif o in ("-t", "--to-rmp"): # if not os.path.isfile(a): # print 'Raw data file NOT exist: %s' % a # sys.exit(99) # else: # tormp = True # rawfile = a #elif o in ("-k", "--kml"): # if not os.path.isfile(a): # print 'cfprints table file NOT exist: %s' % a # sys.exit(99) # else: # dokml = True # cfpsfile = a #elif o in ("-n", "--no-dump"): # nodump = True elif o in ("-f", "--floor"): if a.isdigit(): floor = int(a) else: print '\nfloor: %s should be an INTEGER!\n' % str(a) usage(); sys.exit(99) elif o in ("-u", "--updatedb"): updatedb = True elif o in ("-v", "--verbose"): verbose = True pp = PrettyPrinter(indent=2) elif o in ("-h", "--help"): usage(); sys.exit(0) else: print 'Parameter NOT supported: %s' % o usage(); sys.exit(99) if doLoadRawdata: loadRawdata(rawfile, updbmode) # Update Algorithm related data. if updatedb: updateAlgoData() if crawl_area: crawlAreaLocData() # Ordinary fingerprints clustering. if docluster: if cluster_type == 1: doClusterAll(file(rmpfile)) elif cluster_type == 2: dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) n_inserts = doClusterIncr(fd_csv=file(rmpfile), wppdb=wppdb) print 'Added: [%s] clusters, [%s] FPs' % (n_inserts['n_newcids'], n_inserts['n_newfps']) wppdb.close() else: sys.exit('Unsupported cluster type code: %s!' % cluster_type) # KML generation. #if dokml: # genKMLfile(cfpsfile) ## Raw data to fingerprint convertion. #if tormp: # fingerprint = [] # fingerprint = genFPs(rawfile) # if not fingerprint: # print 'Error: Fingerprint generation FAILED: %s' % rawfile # sys.exit(99) # if nodump is False: # if not rawfile == None: # date = strftime('%Y-%m%d') # rmpfilename = DATPATH + date + RMPSUFFIX # dumpCSV(rmpfilename, fingerprint) # print '-'*65 # sys.exit(0) # else: # usage(); sys.exit(99) # else: # if verbose: pp.pprint(fingerprint) # else: print fingerprint # sys.exit(0) # WLAN scan for FP raw data collection. if scan: collectFPs()
def main(): import getopt try: opts, args = getopt.getopt(sys.argv[1:], "ac:f:hi:k:m:nr:st:uv", [ "areacrawl", "cluster", "floor=", "help", "spid=", "kml=", "mode=", "no-dump", "rawdata", "scan", "to-rmp=", "updatedb", "verbose" ]) except getopt.GetoptError: usage() sys.exit(99) if not opts: usage() sys.exit(0) # global vars init. crawl_area = False updatedb = False doLoadRawdata = False scan = False #spid=0; tormp=False; tfail=0; dokml=False; rawfile = None docluster = False updbmode = 1 global verbose, pp, floor, nodump verbose = False pp = None nodump = False floor = False for o, a in opts: if o in ("-a", "--areacrawl"): crawl_area = True elif o in ("-c", "--cluster"): if not a.isdigit(): print '\ncluster type: %s should be an INTEGER!' % str(a) usage() sys.exit(99) else: # 1-All; 2-Incr. cluster_type = int(a) docluster = True rmpfile = sys.argv[3] if not os.path.isfile(rmpfile): print 'Raw data file NOT exist: %s!' % rmpfile sys.exit(99) #elif o in ("-i", "--spid"): # if a.isdigit(): spid = int(a) # else: # print '\nspid: %s should be an INTEGER!' % str(a) # usage(); sys.exit(99) elif o in ("-m", "--mode"): if a.isdigit(): updbmode = int(a) if not (1 <= updbmode <= 2): print '\nError: updatedb mode: (%d) NOT supported yet!' % updbmode usage() sys.exit(99) else: print '\nmode: %s should be an INTEGER!' % str(a) usage() sys.exit(99) elif o in ("-r", "--rawdata"): if not os.path.isfile(a): print 'Rawdata file NOT exist: %s' % a sys.exit(99) else: doLoadRawdata = True rawfile = a elif o in ("-s", "--scan"): scan = True #elif o in ("-t", "--to-rmp"): # if not os.path.isfile(a): # print 'Raw data file NOT exist: %s' % a # sys.exit(99) # else: # tormp = True # rawfile = a #elif o in ("-k", "--kml"): # if not os.path.isfile(a): # print 'cfprints table file NOT exist: %s' % a # sys.exit(99) # else: # dokml = True # cfpsfile = a #elif o in ("-n", "--no-dump"): # nodump = True elif o in ("-f", "--floor"): if a.isdigit(): floor = int(a) else: print '\nfloor: %s should be an INTEGER!\n' % str(a) usage() sys.exit(99) elif o in ("-u", "--updatedb"): updatedb = True elif o in ("-v", "--verbose"): verbose = True pp = PrettyPrinter(indent=2) elif o in ("-h", "--help"): usage() sys.exit(0) else: print 'Parameter NOT supported: %s' % o usage() sys.exit(99) if doLoadRawdata: loadRawdata(rawfile, updbmode) # Update Algorithm related data. if updatedb: updateAlgoData() if crawl_area: crawlAreaLocData() # Ordinary fingerprints clustering. if docluster: if cluster_type == 1: doClusterAll(file(rmpfile)) elif cluster_type == 2: dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) n_inserts = doClusterIncr(fd_csv=file(rmpfile), wppdb=wppdb) print 'Added: [%s] clusters, [%s] FPs' % ( n_inserts['n_newcids'], n_inserts['n_newfps']) wppdb.close() else: sys.exit('Unsupported cluster type code: %s!' % cluster_type) # KML generation. #if dokml: # genKMLfile(cfpsfile) ## Raw data to fingerprint convertion. #if tormp: # fingerprint = [] # fingerprint = genFPs(rawfile) # if not fingerprint: # print 'Error: Fingerprint generation FAILED: %s' % rawfile # sys.exit(99) # if nodump is False: # if not rawfile == None: # date = strftime('%Y-%m%d') # rmpfilename = DATPATH + date + RMPSUFFIX # dumpCSV(rmpfilename, fingerprint) # print '-'*65 # sys.exit(0) # else: # usage(); sys.exit(99) # else: # if verbose: pp.pprint(fingerprint) # else: print fingerprint # sys.exit(0) # WLAN scan for FP raw data collection. if scan: collectFPs()
def crawlAreaLocData(): """ 1) fetch 100 records with flag area_ok = 0. 2) try areaLocation(laccid), if OK, then update flag area_ok =1 and quit; else goto 2). 3) try googleAreaLocation(latlon), if OK, then get geoaddr:[province,city,district]; else |wpp_uprecsinfo|.area_try += 1 and quit. 4) search area_code for the found district, insert area location (laccid,areacode,areaname_cn) into |wpp_cellarea|, and update flag area_ok = 1. """ fail_history = {} dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) # select config.CRAWL_LIMIT raw fps which haven't tried for google area location. fps_noarea = wppdb.getCrawlFPs() for fp in fps_noarea: # try areaLocation(laccid) laccid = '%s-%s' % (fp[8], fp[9]) if laccid in fail_history: continue time = fp[2] print laccid, time if wppdb.areaLocation(laccid): # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) else: print fp # try google area location. geoaddr = googleAreaLocation(latlon=(fp[11], fp[12])) # area_try += 1 & quit wppdb.setUprecAreaTry(area_try=fp[18] + 1, time=time) if geoaddr: # insert area location info(laccid~geoaddr) into |wpp_cellarea|. # till now, area_location: 'laccid,area_code,province>city>district'. area_location = wppdb.addAreaLocation(laccid=laccid, geoaddr=geoaddr) if not area_location: if not laccid in fail_history: fail_history[laccid] = geoaddr print 'Failed to add area location: [%s] for cell[%s]' % \ (geoaddr[-1].encode('utf8'), laccid) continue # area_ok = 1 & quit. wppdb.setUprecsAreaStatus(status=1, time=time) print area_location.encode( 'utf8') # encode('utf8') for crontab. else: if geoaddr is None: sys.exit(0) # OVER_QUERY_LIMIT. else: pass
def updateAlgoData(): """ Update from raw data into FPs directly used by location.fixPosWLAN() from WppDB(wpp_clusterid, wpp_cfps). 1) Retrieve latest incremental rawdata(csv) from remote FTP server(hosted by FPP). 2) Decompress bzip2, import CSV into wpp_uprecsinfo with its ver_uprecs, Update ver_uprecs in wpp_uprecsver. 3) Incr clustering inserted rawdata for direct algo use. """ dbips = DB_OFFLINE for dbip in dbips: dbsvr = dbsvrs[dbip] wppdb = WppDB(dsn=dbsvr['dsn'], dbtype=dbsvr['dbtype']) ver_wpp = wppdb.getRawdataVersion() # Sync rawdata into wpp_uprecsinfo from remote FTP server. print 'Probing rawdata version > [%s]' % ver_wpp vers_fpp, localbzs = syncFtpUprecs(FTPCFG, ver_wpp) if not vers_fpp: print 'Not found!' continue else: print 'Found new vers: %s' % vers_fpp # Handle each bzip2 file. alerts = {'vers': [], 'details': ''} tab_rd = 'wpp_uprecsinfo' for bzfile in localbzs: # Filter out the ver_uprecs info from the name of each bzip file. ver_bzfile = bzfile.split('_')[-1].split('.')[0] # Update ver_uprecs in wpp_uprecsver to ver_bzfile. wppdb.setRawdataVersion(ver_bzfile) print '%s\nUpdate ver_uprecs -> [%s]' % ('-' * 40, ver_bzfile) # Decompress bzip2. sys.stdout.write('Decompress & append rawdata ... ') csvdat = csv.reader(BZ2File(bzfile)) try: indat = np_array([line for line in csvdat]) except csv.Error, e: sys.exit('\n\nERROR: %s, line %d: %s!\n' % (bzfile, csvdat.line_num, e)) # Append ver_uprecs(auto-incr),area_ok(0),area_try(0) to raw 16-col fp. append_info = np_array([[ver_bzfile, 0, 0] for i in xrange(len(indat))]) indat_withvers = np_append(indat, append_info, axis=1).tolist() print 'Done' # Import csv into wpp_uprecsinfo. try: sys.stdout.write('Import rawdata: ') wppdb.insertMany(table_name=tab_rd, indat=indat_withvers, verb=True) except Exception, e: _lineno = sys._getframe().f_lineno _file = sys._getframe().f_code.co_filename alerts['details'] += '\n[ver:%s][%s:%s]: %s' % \ (ver_bzfile, _file, _lineno, str(e).replace('\n', ' ')) alerts['vers'].append(ver_bzfile) print 'ERROR: Insert Rawdata Failed!' continue # Incr clustering. # file described by fd_csv contains all *location enabled* rawdata from wpp_uprecsinfo. strWhere = 'WHERE lat!=0 and lon!=0 and ver_uprecs=%s' % ver_bzfile cols_ignored = 3 # 3 status cols to be ignored during clustering: ver_uprecs,area_ok,area_try. cols_select = ','.join(wppdb.tbl_field[tab_rd][:-cols_ignored]) sql = wppdb.sqls['SQL_SELECT'] % (cols_select, '%s %s' % (tab_rd, strWhere)) rdata_loc = wppdb.execute(sql=sql, fetch_one=False) if not rdata_loc: continue # NO FPs has location info. str_rdata_loc = '\n'.join( [','.join([str(col) for col in fp]) for fp in rdata_loc]) fd_csv = StringIO(str_rdata_loc) print 'FPs for Incr clustering selected & ready' n_inserts = doClusterIncr(fd_csv=fd_csv, wppdb=wppdb, verb=False) print 'AlgoData added: [%s] clusters, [%s] FPs' % ( n_inserts['n_newcids'], n_inserts['n_newfps'])