Options: \t-h, --help:\tprint help to STDOUT and quit \t-v, --verbose:\tverbose output \t-s, --source:\tsource default backpage ''' import sys import getopt import watdb from watdb import Watdb import re from dig.pymod.util import asStream import dig.pymod.util from watlog import watlog logger = watlog("wat.bphone") logger.info('wat.bphone initialized') ## todo ## consider /nfs/studio-data/wat/data/escort/20130124/neworleans.backpage.com/FemaleEscorts/sweet-southern-beautyariel-21/7544109 ## apparently extracts part of URL as phone number ## we are supposed to be looking only at the text proper ## is shedhml doing its job? VERSION = '0.4' REVISION = "$Revision: 23000 $" # defaults VERBOSE = True AREA_CODES = dict()
\t--revision:\t = as used in crawl/extract, defaults to watmeta revision \t--schema:\t = schema of watmeta, may not match crawl/extract \t--prop:\t = major aspect of property to record \t--facet:\t = opt minor aspect of property to record \t--val:\t = val to record (limit 63 char) ''' import sys import getopt import watdb from watdb import Watdb from collections import defaultdict import util from watlog import watlog logger = watlog("wat.watmeta") logger.info('wat.watmeta initialized') import web web.config.debug = False import socket version = '0.8' # defaults VERBOSE = False TABLE = 'watmeta' APPLICATION = 'escort' TASK = 'extract' SCOPE = 'wat'
@author: Andrew Philpot @version 0.13 trbot/wat imghash module file-level hashing of image. Not concerned with maintaining database pointers Usage: python imghash.py Options: \t-h, --help:\tprint help to STDOUT and quit \t-v, --verbose:\tverbose output \t-r, --repo:\trepository root ''' import logging from watlog import watlog logger = watlog("wat.imghash") logger.info('wat.imghash initialized') import sys import getopt import os import shutil import errno # import time # import datetime import Image import util from util import safeHex VERSION = '0.13'
@author: Andrew Philpot @version 0.5 trbot/wat imghashdir module directory-level hashing of images suitable to be called at YYYYMMDD dir root Usage: python imghashdir.py <dir> Options: \t-h, --help:\tprint help to STDOUT and quit \t-v, --verbose:\tverbose output \t-r, --repo:\trepository root ''' import logging from watlog import watlog logger = watlog("wat.imghash.imghashdir") logger.info('wat.imghash.imghashdir initialized') import sys import getopt import os import time import Image from imghash import Imghash, Error, InputError, MissingInputFile, CorruptInputFile, ProcessingError, FailedHashAttemptError, IntegrityError, SizeMismatchError, REPO import util VERSION = '0.5' # defaults
\t-t, --tier:\tsee wataux.markettiers, integer 1-99, no default \t-r, --region:\4-digit region code or 5-char region desig, see wataux.marketregions, no default ''' import sys import getopt # import trbotdb import watdb import util import re import web web.config.debug = False # import logging from watlog import watlog logger = watlog("wat.boutique") logger.info('wat.boutique initialized') VERSION = '0.6' REVISION = "$Revision: 22999 $" # defaults VERBOSE = True SOURCE = 'backpage' APPLICATION = 'escort' # MARKET = 'LAX' MARKET = None CODE = MARKET CITY = None SITEKEY = None
# _orig_interpolate=web.db._interpolate # needed? def _interpolate_ignore_dollar_sign(format): # print "enter _interpolate_ignore_dollar_sign" return [(0, format)] web.db._interpolate = _interpolate_ignore_dollar_sign ## end v 0.10 from web.db import sqlquote from collections import defaultdict from watlog import watlog logger = watlog("wat.watdb") logger.info('wat.watdb initialized') # WE HAVE TWO ENGINES: MySQLdb and webpy # note that MySQLdb is a zipped python egg and needs to be be able to # uncompress into a python-eggs directory. For generality when # running as a web server, I placed a directive in httpd.conf, but one # could also do something like # os.environ['PYTHON_EGG__CACHE'] = '/tmp/python-eggs' import MySQLdb import web web.config.debug = False VERSION = '0.10' REVISION = "$Revision: 21852 $" VERBOSE = True