Beispiel #1
0
Options:
\t-h, --help:\tprint help to STDOUT and quit
\t-v, --verbose:\tverbose output
\t-s, --source:\tsource default backpage
'''

import sys
import getopt
import watdb
from watdb import Watdb
import re
from dig.pymod.util import asStream
import dig.pymod.util

from watlog import watlog
logger = watlog("wat.bphone")
logger.info('wat.bphone initialized')

## todo
## consider /nfs/studio-data/wat/data/escort/20130124/neworleans.backpage.com/FemaleEscorts/sweet-southern-beautyariel-21/7544109
## apparently extracts part of URL as phone number
## we are supposed to be looking only at the text proper
## is shedhml doing its job?

VERSION = '0.4'
REVISION = "$Revision: 23000 $"

# defaults
VERBOSE = True

AREA_CODES = dict()
Beispiel #2
0
\t--revision:\t = as used in crawl/extract, defaults to watmeta revision
\t--schema:\t = schema of watmeta, may not match crawl/extract

\t--prop:\t = major aspect of property to record
\t--facet:\t = opt minor aspect of property to record
\t--val:\t = val to record (limit 63 char)
'''

import sys
import getopt
import watdb
from watdb import Watdb
from collections import defaultdict
import util
from watlog import watlog
logger = watlog("wat.watmeta")
logger.info('wat.watmeta initialized')
import web
web.config.debug = False
import socket

version = '0.8'

# defaults
VERBOSE = False

TABLE = 'watmeta'

APPLICATION = 'escort'
TASK = 'extract'
SCOPE = 'wat'
Beispiel #3
0
@author: Andrew Philpot
@version 0.13

trbot/wat imghash module
file-level hashing of image.  Not concerned with 
maintaining database pointers
Usage: python imghash.py
Options:
\t-h, --help:\tprint help to STDOUT and quit
\t-v, --verbose:\tverbose output
\t-r, --repo:\trepository root
'''

import logging
from watlog import watlog
logger = watlog("wat.imghash")
logger.info('wat.imghash initialized')

import sys
import getopt
import os
import shutil
import errno
# import time
# import datetime
import Image

import util
from util import safeHex

VERSION = '0.13'
Beispiel #4
0
@author: Andrew Philpot
@version 0.5

trbot/wat imghashdir module
directory-level hashing of images
suitable to be called at YYYYMMDD dir root
Usage: python imghashdir.py <dir>
Options:
\t-h, --help:\tprint help to STDOUT and quit
\t-v, --verbose:\tverbose output
\t-r, --repo:\trepository root
'''

import logging
from watlog import watlog
logger = watlog("wat.imghash.imghashdir")
logger.info('wat.imghash.imghashdir initialized')

import sys
import getopt
import os
import time
import Image

from imghash import Imghash, Error, InputError, MissingInputFile, CorruptInputFile, ProcessingError, FailedHashAttemptError, IntegrityError, SizeMismatchError, REPO

import util

VERSION = '0.5'

# defaults
Beispiel #5
0
Options:
\t-h, --help:\tprint help to STDOUT and quit
\t-v, --verbose:\tverbose output
\t-s, --source:\tsource default backpage
'''

import sys
import getopt
import watdb
from watdb import Watdb
import re
from dig.pymod.util import asStream
import dig.pymod.util

from watlog import watlog
logger = watlog("wat.bphone")
logger.info('wat.bphone initialized')

## todo
## consider /nfs/studio-data/wat/data/escort/20130124/neworleans.backpage.com/FemaleEscorts/sweet-southern-beautyariel-21/7544109
## apparently extracts part of URL as phone number
## we are supposed to be looking only at the text proper
## is shedhml doing its job?

VERSION = '0.4'
REVISION = "$Revision: 23000 $"

# defaults
VERBOSE = True

AREA_CODES = dict()
Beispiel #6
0
\t-t, --tier:\tsee wataux.markettiers, integer 1-99, no default
\t-r, --region:\4-digit region code or 5-char region desig, see wataux.marketregions, no default
'''

import sys
import getopt
# import trbotdb
import watdb
import util
import re
import web
web.config.debug = False

# import logging
from watlog import watlog
logger = watlog("wat.boutique")
logger.info('wat.boutique initialized')

VERSION = '0.6'
REVISION = "$Revision: 22999 $"

# defaults
VERBOSE = True

SOURCE = 'backpage'
APPLICATION = 'escort'
# MARKET = 'LAX'
MARKET = None
CODE = MARKET
CITY = None
SITEKEY = None
Beispiel #7
0
# _orig_interpolate=web.db._interpolate # needed?

def _interpolate_ignore_dollar_sign(format):
    # print "enter _interpolate_ignore_dollar_sign"
    return [(0, format)]

web.db._interpolate = _interpolate_ignore_dollar_sign

## end v 0.10

from web.db import sqlquote
from collections import defaultdict

from watlog import watlog
logger = watlog("wat.watdb")
logger.info('wat.watdb initialized')

# WE HAVE TWO ENGINES: MySQLdb and webpy
# note that MySQLdb is a zipped python egg and needs to be be able to
# uncompress into a python-eggs directory.  For generality when
# running as a web server, I placed a directive in httpd.conf, but one
# could also do something like 
# os.environ['PYTHON_EGG__CACHE'] = '/tmp/python-eggs'
import MySQLdb
import web
web.config.debug = False

VERSION = '0.10'
REVISION = "$Revision: 21852 $"
VERBOSE = True