def full_scan():
	set_log(os.path.join("logs", "txt", "rent_control.txt"))
	log("Scanning rent_control.geojson", important=True)
	rcd = read(os.path.join("scrapers", "data",
		"rent_control.geojson"), isjson=True)["features"]
	rcp = [rc["properties"] for rc in rcd if rc["properties"]["address"]]
	log("using %s of %s rows (omitting blank entries)"%(len(rcp), len(rcd)), 1)

	blds = []
	bset = set()
	for d in rcp:
		addr = d["address"]
		if addr not in bset:
			building = Building.query(Building.address == addr).get()
			if not building:
				log("Can't find '%s' -- creating new entry"%(addr,), 2)
				building = Building(address=addr)
			building.rent_control = True
			blds.append(building)
			bset.add(addr)

	log("saving %s rent-control buildings to db"%(len(blds),), 1)
	db.put_multi(blds)
	log("goodbye")
	close_log()
예제 #2
0
def propertyValues():
	log("Analyzing Property Values", important=True)
	for year in range(2008, 2014):
		yobj = obj[year]
		tobj = yobj["total"]
		tobj["property_values"] = { "all": [] }
		log("parsing: %s"%(year,), 1)
		xls = getxls(read(path("scrapers", "data", "pvals",
			"SanFranciscoPropertyAssessmentRoll%s.xlsx"%(year,))), "%sSecured"%(year,), parse=False)
		for r in range(1, xls.nrows):
			row = xls.row(r)
			pval = row[7].value
			zipcode = row[8].value[:5]
			if zipcode not in yobj:
				yobj[zipcode] = { "fires": 0, "people": 0, "units": 0 }
			zobj = yobj[zipcode]
			if "property_values" not in zobj:
				zobj["property_values"] = { "all": [] }
			zobj["property_values"]["all"].append(pval)
			tobj["property_values"]["all"].append(pval)
		for zcode, zobj in yobj.items():
			if "property_values" in zobj:
				apv = zobj["property_values"]["all"]
				ave = None
				log("analyzing: %s"%(zcode,), 2)
				for conversion, func in ARR_ANYL:
					zobj["property_values"][conversion] = func(apv, ave)
					ave = ave or zobj["property_values"][conversion] # average happens first
					log("%s: %s"%(conversion, zobj["property_values"][conversion]), 3)
				del zobj["property_values"]["all"]
			else:
				log("%s: missing!"%(zcode,), 2)
def processjs(path, jspaths, inits):
    block = read(path)
    for line in block.split("\n"):
        if line.startswith("CT.require(") and not line.endswith(", true);"):
            block = require(line, jspaths, block, inits)
    jspaths.append(path)
    return "%s;\n" % (block, )
def scan(etype):
	fname = "%s_1997_2015.xlsx"%(etype,)
	log("Scanning %s"%(fname,), important=True)
	xls = getxls(read(os.path.join("scrapers", "data", fname)))
	puts = []
	for row in xls[1:]:
		puts.append(SCANROW[etype](row))
	return puts
예제 #5
0
	def acquire(self, url, path):
		fname = url.split("/").pop()
		fpath = os.path.join(path, fname)
		if os.path.exists(fpath):
			return read(fpath)
		log("acquiring: %s"%(url,))
		data = fetch(url)
		write(data, fpath)
		return data
예제 #6
0
 def parse(self):
     self.logger.info("parse")
     url = None
     for line in read("cron.yaml", True):
         if line.startswith("  url: "):
             url = line[7:].strip()
         elif url:
             self.timers[url] = Rule(self.controller, url, line[12:].strip())
             url = None
예제 #7
0
파일: util.py 프로젝트: bubbleboy14/ctuser
def getWPmails():
    import MySQLdb
    h, u, p, d = read(".c").strip().split("|")
    log("extracting email list from WP", 1)
    conn = MySQLdb.connect(host=h, user=u, passwd=p, db=d)
    cur = conn.cursor()
    cur.execute("SELECT user_email FROM wp_users")
    rowz = cur.fetchall()
    log("found %s recipients"%(len(rowz),), 1)
    return [r[0] for r in rowz]
예제 #8
0
 def parse(self):
     self.logger.info("parse")
     url = None
     for line in read("cron.yaml", True):
         if line.startswith("  url: "):
             url = line[7:].strip()
         elif url:
             self.timers[url] = Rule(self.controller, url,
                                     line[12:].strip())
             url = None
예제 #9
0
def setmode(mode):
    doyaml(mode) # support other backends beyond app engine
    ctpy = read(config.py.path)
    isprod = mode == "production"
    # set encode
    ctpy = ctpy.replace(config.py.enc%(str(not isprod),),
        config.py.enc%(str(isprod),))
    # set mode
    for m in ["dynamic", "static", "production"]:
        if m != mode:
            ctpy = ctpy.replace(config.py.mode%(m,), config.py.mode%(mode,))
    write(ctpy, config.py.path)
예제 #10
0
def setmode(mode):
    doyaml(mode)  # support other backends beyond app engine
    ctpy = read(config.py.path)
    isprod = mode == "production"
    # set encode
    ctpy = ctpy.replace(config.py.enc % (str(not isprod), ),
                        config.py.enc % (str(isprod), ))
    # set mode
    for m in ["dynamic", "static", "production"]:
        if m != mode:
            ctpy = ctpy.replace(config.py.mode % (m, ),
                                config.py.mode % (mode, ))
    write(ctpy, config.py.path)
예제 #11
0
파일: util.py 프로젝트: bubbleboy14/ctswarm
def snap():
    log("attempting snap", important=True)
    if not os.path.exists("archive"):
        log("creating archive directory")
        os.mkdir("archive")
    data = read("data.db", binary=True)
    match = indir(data, "archive")
    if match:
        log("matching archive: %s - aborting snap" % (match, ))
        return match
    aname = str(datetime.now()).rsplit(":", 1)[0].replace(" ", "_")
    cmd('cp data.db "%s"' % (os.path.join("archive", aname), ))
    return aname
예제 #12
0
def allUnits():
	csv = getcsv_from_data(read(path("scrapers", "data", "BlockLot_with_LatLon.csv")))
	buildings = {}
	winner = None
	for row in csv:
		addr = "%s %s %s"%(row[22], row[20], row[19])
		if not addr.strip():
			continue
		if addr not in buildings:
			buildings[addr] = 0
		buildings[addr] += 1
		if not winner or buildings[addr] > buildings[winner]:
			winner = addr
	return buildings, winner, len(csv)
예제 #13
0
def doyaml(mode):
    log("switching to %s mode" % (mode, ))
    lines = read(config.yaml.path, lines=True)
    f = open(config.yaml.path, 'w')
    m = None
    for line in lines:
        if line.startswith(config.yaml.start):
            m = line[len(config.yaml.start):].strip()
        elif line.startswith(config.yaml.end):
            m = None
        elif m == mode:
            line = line.strip("#")
        elif m:
            line = "#%s" % (line.strip("#"), )
        f.write(line)
    f.close()
예제 #14
0
def doyaml(mode):
    log("switching to %s mode"%(mode,))
    lines = read(config.yaml.path, lines=True)
    f = open(config.yaml.path, 'w')
    m = None
    for line in lines:
        if line.startswith(config.yaml.start):
            m = line[len(config.yaml.start):].strip()
        elif line.startswith(config.yaml.end):
            m = None
        elif m == mode:
            line = line.strip("#")
        elif m:
            line = "#%s"%(line.strip("#"),)
        f.write(line)
    f.close()
예제 #15
0
def pretex(doc, fname, fontonly=False):
    fcfg = config.ctman.font
    pname = os.path.join("build", "%s.tex" % (fname, ))
    if not fontonly and doc.logo:
        iname = symage(doc.logo.path)
    fontdesc = fcfg.family and FONTFAM % (fcfg.family, fcfg.family) or ""
    write(
        fontonly and fontdesc or read("tex/pre.tex").replace(
            "_CLIENT_LOGO_", doc.logo and iname or "img/logo.jpg").replace(
                "_DECLARATION_PAGE_",
                doc.declaration_page and dex(doc) or "").replace(
                    "_SIGNUP_SHEET_", doc.signup_sheet and SUSHEET
                    or "").replace("_DOC_NAME_", doc.name).replace(
                        "_DOC_REVISION_", str(doc.revision)).replace(
                            "_DOC_FONT_", fontdesc), pname)
    return pname
예제 #16
0
def build(nothing, dirname, fnames):
    """
    This parses an html file, squishes together the javascript, scans
    through for dynamic imports (CT.require statements), injects modules
    wherever necessary, and sticks the result in a big <script> tag.
    """
    if ".svn" in dirname:
        return
    for bd in config.build.compiled_dirs.values():
        checkdir(bd)
    for fname in bfiles(dirname, fnames):
        for mode, compdir in config.build.compiled_dirs.items():
            fulldir = dirname.replace(config.build.dynamic_dir, compdir)
            frompath = os.path.join(dirname, fname)
            topath = os.path.join(fulldir, fname)
            data = read(frompath)
            log('building: %s -> %s' % (frompath, topath), important=True)
            checkdir(fulldir)
            if "fonts" in dirname or not fname.endswith(".html"):
                log('copying non-html file', 1)
            else:
                txt, js = processhtml(data)
                if js:
                    jspaths, jsblock = compilejs(js)
                    if mode is "static":
                        log("static mode", 1)
                        js = '\n'.join([
                            p.endswith("js") and '<script src="%s"></script>' %
                            (p, ) or '<script>%s</script>' % (p, )
                            for p in jspaths
                        ])
                    elif mode is "production":
                        log("production mode", 1)
                        txt = compress(txt)
                        from slimit import minify
                        js = "<script>%s</script>" % (minify(jsblock.replace(
                            '"_encode": false,', '"_encode": true,').replace(
                                "CT.log._silent = false;",
                                "CT.log._silent = true;"),
                                                             mangle=True), )
                    else:
                        error("invalid mode: %s" % (mode, ))
                    data = txt.format(jsspot=js)
                else:
                    data = txt
            write(data, topath)
def getRentControl():
	if not len(list(rcset)):
		import os
		from cantools.util import read, log
		log("Parsing Rent Control Data", important=True)
		rcd = read(os.path.join("scrapers", "data", "rent_control.geojson"),
			isjson=True)["features"]
		for z in zmap:
			zmap[z]["rent_control"] = 0
		rcp = [rc["properties"] for rc in rcd if rc["properties"]["address"]]
		log("using %s of %s rows (omitting blank entries)"%(len(rcp), len(rcd)), 1)
		for d in rcp:
			zc = d["zipcode"]
			rcset.add(d["address"])
			if not zc in zmap:
				zmap[zc] = {
					"rent_control": 0,
					"ad_hoc": True
				}
			zmap[zc]["rent_control"] += 1
			zmap["SF"]["rent_control"] += 1
	return rcset
예제 #18
0
	def __init__(self):
		self.cache = read("%s.json"%(zcpath,), isjson=True, default={})
		for addr in self.cache:
			self.cache[addr]["count"] = 0
예제 #19
0
def part(fname):
    return "# %s\n%s" % (fname.split(".")[0],
                         read(os.path.join("templates", fname)))
예제 #20
0
 def __init__(self):
     self.cache = read("%s.json" % (zcpath, ), isjson=True, default={})
     for addr in self.cache:
         self.cache[addr]["count"] = 0
예제 #21
0
	38: [["\n\n 1\n\n 5\n\n", "\n\n"]],
	43: [["\n\n 1\n\n 1\n\n", "\n\n"]],
	45: [["\n\n 1\n\n", "\n\n"]],
	47: [["\n\n 5\n\n 1\n\n", "\n\n"]],
	51: [["\n\n 1\n\n", "\n\n"]],
	23: [["\nB02\nB10\nB10\nB09\nB06\nB06\nB03\nB06\nB01\nB09\nB06\nB08\nB06\nB10\nB06\nB01\nB01\nB10\nB04\nB03\nB04\nB06\nB02\nB10\nB06\nB08\nB04\nB07\nB02\nB10\nB09\nB08\nB10\nB02\nB04\nB06\nB04\nB05\nB10\nB02\nB09\n\n94124\n94124\n94112\n94114\n94110\n94130\n94110\n94133\n94112\n94110\n94122\n94131\n\n94114\n94111\n94102\n94124\n94123\n94104\n94109\n94110\n94114\n94107\n\n94116\n94115\n94121\n94110\n94124\n94112\n94122\n94124\n94103\n94115\n94131\n94109\n94117\n94124\n94102\n94112\n", "\n94124\n94124\n94112\n94114\n94110\n94130\n94110\n94133\n94112\n94110\n94122\n94131\n\n94114\n94111\n94102\n94124\n94123\n94104\n94109\n94110\n94114\n94107\n\n94116\n94115\n94121\n94110\n94124\n94112\n94122\n94124\n94103\n94115\n94131\n94109\n94117\n94124\n94102\n94112\n\nB02\nB10\nB10\nB09\nB06\nB06\nB03\nB06\nB01\nB09\nB06\nB08\nB06\nB10\nB06\nB01\nB01\nB10\nB04\nB03\nB04\nB06\nB02\nB10\nB06\nB08\nB04\nB07\nB02\nB10\nB09\nB08\nB10\nB02\nB04\nB06\nB04\nB05\nB10\nB02\nB09\n"]],
	32: [['\n1267 PAGE ST\n333 MONTICELLO ST\n1568 HAIGHT ST\n\n94117\n94132\n94117\n', '\n\n1267 PAGE ST\n94117\n333 MONTICELLO ST\n94132\n1568 HAIGHT ST\n94117\n'], ["\n\n 2\n\n 2\n\n 1\n\n", "\n\n"]]
}

STEST = False
config.geo.test = STEST # happens automatically based on ct.cfg unless scraper is run independent of web server
pbase = STEST and ".." or "."
def path(*components):
	return os.path.join(pbase, *components)

data = read(path("scrapers", "data", "fires.txt"))
pages = [p.split(PBRK)[1] for p in data.split("\x0c") if PBRK in p]

def getDates(page):
	datelines, page = page.split("\n\n", 1)
	dates = []
	for dateline in datelines.split("\n"):
		if "/" in dateline:
			ds = [int(d) for d in dateline.split(" ")[-1].split("/")]
			dates.append(datetime(ds[2], ds[0], ds[1]))
	log("got %s dates"%(len(dates),), 1)
	return dates, page

def a2z(addr):
	return addr2zip("%s, san francisco, CA"%(addr,))