Example #1
0
def main(): 
    targetDb = "jobaly"
    targetClient = DbClient('localhost', 27017, targetDb) 
    srcDb = "jobaly_daily" 
    srcClient = DbClient('localhost', 27017, srcDb)
    
    targetCollName = "job1000"     
    srcCollnames = "daily_job_info_2014-06-16"
    
    srcColl = srcClient.getCollection(srcCollnames)
    targetColl = targetClient.getCollection(targetCollName)
    
    size = 1000 
    copyCollection(srcColl, targetColl, size)
Example #2
0
    def __init__(self, window, size, options):
        """ Initialise and start the snackspace application """

        self.inittime = int(time.time())

        self.options = options

        self.input_handler = InputHandler()

        self.task_handler = TaskHandler(self)
        self.task_handler.add_function(self.rfid_task, 500, True)

        self.logger = logging.getLogger("snackspace")

        self.rfid = RFIDReader(self.options.rfid_port)

        self.is_fullscreen = True
        self.cursor_visible = False

        self.window_size = size
        self.screen_manager = ScreenManager(self, window, size)

        self.user = None
        self.products = []

        self.reply_queue = Queue.Queue()

        self.dbaccess = DbClient(self.options.hostip, self.task_handler,
                                 self.db_state_callback)
        self.dbaccess.daemon = True
        self.dbaccess.start()
Example #3
0
def main():

    collectionName = "job_lang_top_corps"
    infoCollectionName = "jobinfo_lang_top_corps"

    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    infoCollection = dbClient.getCollection(infoCollectionName)
    getter = IndeedPageGetter(infoCollection)

    pageSize = 10
    pageNo = 149
    has_more = True
    pageNum = 10000
    find_sort = None
    find_spec = None
    while has_more and pageNo <= pageNum:
        page = dbClient.getPage(collection, find_spec, find_sort, pageSize,
                                pageNo)
        getter.processPage(page, pageNo)
        pageNo += 1
        count = page.count(with_limit_and_skip=True)
        #   print "count=",count
        if (count < pageSize):
            has_more = False
Example #4
0
def main():
    cities = [
        'MoutainView, CA', 'Seattle, WA', 'San Diego, CA', 'San Francisco, CA',
        'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY',
        'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA',
        ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA'
    ]

    cities = [
        'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY',
        'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA',
        ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA'
    ]

    _pageSize = 25
    _fromage = 30
    _location = 94040
    _radius = 25
    _query = "software engineer"

    collectionName = "job_se_10city"
    indeedClient = ApiClient(_query, _pageSize, _fromage, _location, _radius)
    # client.getPage(0)
    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    for city in cities:
        print "-----prcoss city %s -------" % city
        indeedClient.processCity(collection, city)
Example #5
0
def main():
    df = pd.read_csv(RESULTS_CALENDER_PATH,
                     skiprows=1,
                     names=RESULTS_CALENDER_COLUMNS)
    dbClient = DbClient()
    for i, record in enumerate(yaml.safe_load(df.to_json(orient='records'))):
        record['result_date'] = datetime.datetime.strptime(
            record['result_date'], "%d %b %Y").strftime('%Y-%m-%d')
        dbClient.update_results_calender(record)
Example #6
0
def main():

    collectionName = "jobinfo_se_top_corps"
    dbClient = DbClient('localhost', 27017, "jobaly")
    pageProcessor = CollectionPageProcessor(dbClient, collectionName)
    #    pageProcessor.process(printPageCount, pageNo = 1,   pageNum=10  )
    pageProcessor.process(addField,
                          pageNo=1,
                          pageNum=100,
                          pageSize=100,
                          find_sort=[("_id", 1)])
Example #7
0
def main():

    stopwords = getStopWords("stopwords.txt")
    tfgetter = TfGetter(stopwords)

    collectionName = "jobinfo_se_top_corps"
    dbClient = DbClient('localhost', 27017, "jobaly")
    pageProcessor = CollectionPageProcessor(dbClient, collectionName)
    pageProcessor.process(tfgetter.processPage,
                          pageNo=1,
                          pageNum=100,
                          find_sort=[("_id", 1)])

    printDict(tfgetter.term_num_docs, "term_idf.txt")
Example #8
0
def getByCities():

    cities = [
        'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY',
        'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA',
        ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA'
    ]

    param = {"q": "software engineer", "fromage": "30"}

    collectionName = "job_se_10city"
    indeedClient = ApiClient(param)
    # client.getPage(0)
    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    for city in cities:
        print "-----prcoss city %s -------" % city
        indeedClient.processQuery(collection, "l", city)
Example #9
0
def getByCorps():
    print " --- get job by companies---"
    collectionName = "job_se_top_corps"
    param = {"q": "software engineer", "fromage": "30"}
    indeedClient = ApiClient(param)
    # client.getPage(0)
    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    corps = []
    fileName = "topcorps.txt"
    with open(fileName, 'r') as the_file:
        for line in the_file:
            word = line.strip()
            if not len(word) == 0:
                corps.append(word)

    for corp in corps:
        q = indeedClient.buildQuery("software engineer", {"company": corp})
        print "-----prcoss corp %s -------" % corp
        indeedClient.processQuery(collection, "q", q)
Example #10
0
def getByLang():
    
    print " --- get job by language and companies---"
    collectionName = "job_lang_top_corps"
    param = { "q" : "software engineer", 
               "fromage" : "30"    }    
               
    lang_names = utils.loadArrayFromFile("pro_langs.txt")
    corps_names = utils.loadArrayFromFile("topcorps.txt")
    
    indeedClient= ApiClient( param )
    # client.getPage(0)
    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    
    
    for corp in corps_names:
       for lang in lang_names:
           q = indeedClient.buildQuery(lang, {"company": corp })
           print "-----prcoss corp %s with language %s -------" % (corp, lang) 
           indeedClient.processQuery(collection, "q", q)
Example #11
0
def main(): 
    collectionName = "job_lang_top_corps"
    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    
    title_dict = {}
    for job in collection.find():
        # print job["_id"], job["jobtitle"]
        title =  job["jobtitle"]
        if title_dict.has_key(title): 
            title_dict[title] += 1
        else :
            title_dict[title] = 1
    
    stat_file_name =  "jobtitle_stat.txt"  
    with open( stat_file_name , "w") as text_file:   
        i = 0 
        for (key, value) in sorted(title_dict.iteritems(), key=operator.itemgetter(1), reverse = True):
        #     print key, ":", value 
             text_file.write("%s : %s \n" % (key.encode('utf8'),value)) 
             i+=1
        print i, " lines had been writen into file:", stat_file_name
Example #12
0
def main():

    collectionName = "job_se_10city"
    infoCollectionName = "jobinfo_se_10city"

    collectionName = "job_lang_top_corps"
    infoCollectionName = "jobinfo_lang_top_corps"

    dbClient = DbClient('localhost', 27017, "jobaly")
    collection = dbClient.getCollection(collectionName)
    infoCollection = dbClient.getCollection(infoCollectionName)

    pageSize = 20
    pageNo = 1
    has_more = True
    pageNum = 10000
    find_sort = None
    find_spec = None

    threadNum = 20
    queue = Queue.Queue()
    for i in range(threadNum):
        t = JobGetter(queue, infoCollection)
        t.setDaemon(True)
        t.start()

    while has_more and pageNo <= pageNum:
        page = dbClient.getPage(collection, find_spec, find_sort, pageSize,
                                pageNo)
        queue.put((page, pageNo))
        pageNo += 1
        count = page.count(with_limit_and_skip=True)
        #   print "count=",count
        if (count < pageSize):
            has_more = False

    queue.join()
Example #13
0
def main(backup_day):
    global NSE_DAILY_DATA_URL, NSE_DAILY_DATA_CSV_ZIP_FILENAME, NSE_DAILY_DATA_CSV_FILENAME, DOWNLOADS_PATH
    year, month, _day = backup_day.split('-')[0], MONTH_NAMES[backup_day.split(
        '-')[1]], backup_day.split('-')[2]
    NSE_DAILY_DATA_URL = NSE_DAILY_DATA_URL.format(day=_day,
                                                   month=month,
                                                   year=year)
    NSE_DAILY_DATA_CSV_ZIP_FILENAME = NSE_DAILY_DATA_CSV_ZIP_FILENAME.format(
        day=_day, month=month, year=year)
    NSE_DAILY_DATA_CSV_FILENAME = NSE_DAILY_DATA_CSV_FILENAME.format(
        day=_day, month=month, year=year)
    logging.info("Started backup for the day: {}".format(backup_day))
    file_path = os.path.join(DOWNLOADS_PATH, NSE_DAILY_DATA_CSV_ZIP_FILENAME)
    if not os.path.exists(file_path):
        if not os.path.exists(os.path.dirname(file_path)):
            os.makedirs(os.path.dirname(file_path))
        print NSE_DAILY_DATA_URL
        req = urllib2.Request(NSE_DAILY_DATA_URL,
                              headers=NSE_DAILY_DATA_HEADERS)
        page = urllib2.urlopen(req)
        with open(file_path, 'wb') as writer:
            writer.write(page.read())
    zf = zipfile.ZipFile(file_path)
    df = pd.read_csv(zf.open(NSE_DAILY_DATA_CSV_FILENAME),
                     skiprows=1,
                     names=NSE_DAILY_DATA_COLUMNS)
    del df['empty']
    dbClient = DbClient()
    for i, record in enumerate(yaml.safe_load(df.to_json(orient='records'))):
        record['added_on'] = datetime.datetime.strptime(
            backup_day, "%Y-%m-%d").strftime('%Y-%m-%d %H:%M:%S')
        record['p_change'] = round(
            (float(record['close']) - float(record['prev_close'])), 2)
        record['change_percent'] = round(
            (float(record['close']) - float(record['prev_close'])) /
            float(record['prev_close']) * 100, 2)
        dbClient.update_nse_daily_data(record)
from pymongo import MongoClient
import exceptions
from settings import AUTH_TOKEN
import os
import urllib

app = Flask(__name__)

app.config.update(
    MONGODB_HOST='mongo',
    MONGODB_PORT='27017',
    MONGODB_DB='processed_meetings',
)
CORS(app)

dbclient = DbClient(MongoClient("mongo", 27017).processed_meetings)


@app.before_request
def verify_token():
    # TODO make this a little more secure
    if "/health" in request.url:
        return None
    url_token = request.args.get('token')
    if not url_token:
        return jsonify({"error": "Valid Token Required"}), 401
    elif url_token != AUTH_TOKEN:
        return jsonify({"error": "Invalid Token Provided"}), 401
    else:
        return None
Example #15
0
# get server credentials from config
configur = ConfigParser()
configur.read(configPath)
servermode = 'cloud'  # Either 'cloud' or 'local'
hostname = configur.get(servermode, 'host')
password = configur.get(servermode, 'password')
port = configur.getint(servermode, 'port')

#initialization
screen = Screen()
analyzer = Analyzer()
scene = Scene()
db = DbClient(screen,
              analyzer,
              scene.handleUpdate,
              host=hostname,
              port=port,
              password=password)


def setup():
    sid = db.getActive()
    scene.setup(sid, db, analyzer, screen)


def main():
    # for layer in layers:
    #   layer.draw(screen.ctx)
    # star.draw()
    #rms, bark, silent = analyzer.get()
    #screen.update(rms, bark, silent)
Example #16
0
def main():
    """Main program handler"""
    with DbClient() as db:
        execute_queries(db)
 def __init__(self):
     self.dbclient = DbClient()
     pass
Example #18
0
def main():
    """Main program handler"""
    with DbClient() as db:
        drop_tables(db)
        create_tables(db)
Example #19
0
def main():
    """Main program handler"""
    with DbClient() as db:
        load_staging_tables(db)
        insert_tables(db)
Example #20
0
 def __init__(self):
     self.task_handler = TaskHandler(self)
     
     self.dbaccess = DbClient(True, self.task_handler)