def run(self): config = Config.get() points = read_features(config.get('GeneratedFiles', 'article_coordinates'), config.get('ExternalFiles', 'external_ids'), required=('x', 'y', 'externalId')) externalData = read_features(self.inPath) records = [] stringFields = set() for i, (id, p) in enumerate(points.items()): if i % 100000 == 0: logger.info('insert point %d of %d' % (i, len(points))) extId = p['externalId'] if extId not in externalData: continue pinfo = {'id': id} for (k, v) in externalData[extId].items(): try: v = float(v) except ValueError: stringFields.add(k) pinfo[k] = v records.append(pinfo) for r in records: for sf in stringFields: r[sf] = str(r[sf]) with open(self.outPath, "w") as f: for r in records: json.dump(r, f) f.write('\n')
def run(self): conf = Config.get() w2vPath = conf.get('ExternalFiles', 'w2v') idMapping = {} for id, idInfo in read_features( conf.get('ExternalFiles', 'external_ids')).items(): idMapping[idInfo['externalId']] = id ft = FreeText(w2vPath, idMapping) ft.rebuildIfNecessary()
def run_server(path_cartograph_cfg, path_tilestache_cfg): Config.initConf(path_cartograph_cfg) path_tilestache_cfg = os.path.abspath(path_tilestache_cfg) path_cache = json.load(open(path_tilestache_cfg, 'r'))['cache']['path'] static_files = {'/static': os.path.join(os.path.abspath('./web'))} if os.path.isdir(path_cache): assert (len(path_cache) > 5) shutil.rmtree(path_cache) app = CartographServer(path_tilestache_cfg, Config.get()) run_simple('0.0.0.0', 8080, app, static_files=static_files)
def requires(self): config = Config.get() result = [] metricDir = config.get('DEFAULT', 'metricDir') for name in config.get('Metrics', 'active').split(): metricConf = json.loads(config.get('Metrics', name)) path = metricConf['path'] args = { 'name': name, 'inPath': path, 'outPath': os.path.join(metricDir, name + '.json'), } result.append(MetricData(**args)) return result
def __init__(self): config = Config.get() self._host = config.get('PG', 'host') self._database = config.get('PG', 'database') self._user = config.get('PG', 'user') or None self._password = config.get('PG', 'password') or None self._table = 'edges' self._dataFile = config.get('GeneratedFiles', 'edge_bundles') self.columns = ['bundle', 'weights', 'numPoints', 'endPoints'] logger.info('loading %s.%s from %s' % (self._database, self._table, self._dataFile)) super(LoadCoordinateEdges, self).__init__() print self.table
def run_server(path_cartograph_cfg, path_tilestache_cfg): """Server run function, you probably don't care about this part if all you care about is implementing search""" Config.initConf(path_cartograph_cfg) path_tilestache_cfg = os.path.abspath(path_tilestache_cfg) path_cache = json.load(open(path_tilestache_cfg, 'r'))['cache']['path'] static_files = {'/static': os.path.join(os.path.abspath('./web'))} if os.path.isdir(path_cache): assert (len(path_cache) > 5) shutil.rmtree(path_cache) app = CartographServer(path_tilestache_cfg, Config.get()) run_simple('0.0.0.0', 8080, app, static_files=static_files)
def run_server(path_cartograph_cfg, path_tilestache_cfg): Config.initConf(path_cartograph_cfg) path_tilestache_cfg = os.path.abspath(path_tilestache_cfg) path_cache = json.load(open(path_tilestache_cfg, 'r'))['cache']['path'] static_files = { '/static': os.path.join(os.path.abspath('./web')) } if os.path.isdir(path_cache): assert(len(path_cache) > 5) shutil.rmtree(path_cache) app = CartographServer(path_tilestache_cfg, Config.get()) run_simple('0.0.0.0', 8080, app, static_files=static_files)
def run_server(path_cartograph_cfg, path_tilestache_cfg): """Server run function, you probably don't care about this part if all you care about is implementing search""" Config.initConf(path_cartograph_cfg) path_tilestache_cfg = os.path.abspath(path_tilestache_cfg) path_cache = json.load(open(path_tilestache_cfg, 'r'))['cache']['path'] static_files = { '/static': os.path.join(os.path.abspath('./web')) } if os.path.isdir(path_cache): assert(len(path_cache) > 5) shutil.rmtree(path_cache) app = CartographServer(path_tilestache_cfg, Config.get()) run_simple('0.0.0.0', 8080, app, static_files=static_files)
def run(self): config = Config.get() coords = read_features( config.get("GeneratedFiles", "article_coordinates")) pathIn = config.get("ExternalFiles", "links") pathOut = config.get("GeneratedFiles", "edges_with_coords") with open(pathIn) as fin, open(pathOut, 'w') as fout: logging.getLogger('counting edges') # Count num of valid edges and vertices numEdges = 0 numVertices = 0 for line in fin: vertices = line.split() src = vertices[0] if src not in coords: continue numVertices += 1 for dest in vertices[1:]: if dest in coords: numEdges += 1 logger.info( 'found %d edges containing %d vertices (out of %d total vertices)' % (numEdges, numVertices, len(coords))) fin.seek(0) fout.write(str(numEdges) + '\n') for line in fin: vertices = line.split() src = vertices[0] if src not in coords: continue numVertices += 1 for dest in vertices[1:]: if dest in coords: fields = (coords[src]['x'], coords[src]['y'], coords[dest]['x'], coords[dest]['y']) fout.write(' '.join(fields) + '\n') fin.close() fout.close()
def run(self): config = Config.get() featureDict = Utils.read_features(config.get("ExternalFiles", "names_with_id")) idList = list(featureDict.keys()) nameDict = {} with open(config.get("ExternalFiles", "popularity")) as popularity: lines = popularity.readlines() for line in lines: lineAr = line.split("\t") name = lineAr[0] pop = lineAr[1][:-1] nameDict[name] = pop popularityList = [] for featureID in idList: name = featureDict[featureID]["name"] popularityList.append(nameDict[name]) Utils.write_tsv(config.get('GeneratedFiles', 'popularity_with_id'), ("id", "popularity"), idList, popularityList)
def output(self): config = Config.get() return (TimestampedLocalTarget(config.get("GeneratedFiles", "popularity_with_id")))
def output(self): config = Config.get() return (TimestampedLocalTarget(config.get("ExternalFiles", "popularity")))
def run(self): config = Config.get() assert (False)
def output(self): config = Config.get() return TimestampedLocalTarget( config.get("GeneratedFiles", "edge_bundles"))
def output(self): w2vPath = Config.get().get('ExternalFiles', 'w2v') return [TimestampedLocalTarget(p) for p in FreeText.paths(w2vPath)]
def requires(self): conf = Config.get() return (ExternalFile(self.inPath), ExternalFile(conf.get('ExternalFiles', 'external_ids')), CreateFullCoordinates(), MetricsCode())
def output(self): config = Config.get() return TimestampedLocalTarget( config.get("GeneratedFiles", "edges_with_coords"))
def output(self): config = Config.get() return list( luigi.LocalTarget(config.get('DEFAULT', n)) for n in self.configKeys)
def output(self): config = Config.get() return (TimestampedLocalTarget( config.get("ExternalFiles", "popularity")))
def output(self): config = Config.get() return (TimestampedLocalTarget(config.get("ExternalFiles", "region_names")))
def samplePath(self): config = Config.get() n = config.getint('PreprocessingConstants', 'sample_size') return Config.samplePath(self.path, n)
def output(self): config = Config.get() return (TimestampedLocalTarget(config.get("ExternalFiles", "vecs_with_id")), TimestampedLocalTarget(config.get("ExternalFiles", "names_with_id")))
def output(self): config = Config.get() return list( luigi.LocalTarget(config.get('DEFAULT', n)) for n in self.configKeys )
def output(self): config = Config.get() return (TimestampedLocalTarget( config.get("ExternalFiles", "region_names")))
def run(self): config = Config.get() for k in self.configKeys: fn = config.get('DEFAULT', k) if not os.path.isdir(fn): os.makedirs(fn)
def output(self): config = Config.get() return (TimestampedLocalTarget( config.get("ExternalFiles", "vecs_with_id")), TimestampedLocalTarget( config.get("ExternalFiles", "names_with_id")))
def f(x): return int(255 * x) color = 'rgba(%d,%d,%d,%.3f)' % (f(r), f(g), f(b), a) props = { 'id' : p['id'], 'zpop' : p['zpop'], 'color' : color, 'zoff' : (z - p['zpop']) } for f in metric.fields: props[f] = p.get(f, 0.0) builder.addPoint('cities', p['name'], shapely.geometry.Point(p['x'], p['y']), props) if __name__ == '__main__': from cartograph import Config logging.basicConfig(stream=sys.stderr, level=logging.INFO) Config.initConf('data/conf/simple.txt') pd = PointService(Config.get()) for p in pd.getTilePoints(6, 26, 36, 10): print p['id'], p['zpop'] # for p in pd.getTilePoints(6, 27, 37, 1000): # if p['id'] == '13616': # print p['id'] # builder = TopoJsonBuilder() # pd.addLayers(builder, 'gender', 6, 27, 36) # pd.addLayers(builder, 'gender', 6, 27, 37) # print builder.toJson()
def output(self): config = Config.get() return TimestampedLocalTarget(config.get("ExternalFiles", "links"))