def main(): targetDb = "jobaly" targetClient = DbClient('localhost', 27017, targetDb) srcDb = "jobaly_daily" srcClient = DbClient('localhost', 27017, srcDb) targetCollName = "job1000" srcCollnames = "daily_job_info_2014-06-16" srcColl = srcClient.getCollection(srcCollnames) targetColl = targetClient.getCollection(targetCollName) size = 1000 copyCollection(srcColl, targetColl, size)
def __init__(self, window, size, options): """ Initialise and start the snackspace application """ self.inittime = int(time.time()) self.options = options self.input_handler = InputHandler() self.task_handler = TaskHandler(self) self.task_handler.add_function(self.rfid_task, 500, True) self.logger = logging.getLogger("snackspace") self.rfid = RFIDReader(self.options.rfid_port) self.is_fullscreen = True self.cursor_visible = False self.window_size = size self.screen_manager = ScreenManager(self, window, size) self.user = None self.products = [] self.reply_queue = Queue.Queue() self.dbaccess = DbClient(self.options.hostip, self.task_handler, self.db_state_callback) self.dbaccess.daemon = True self.dbaccess.start()
def main(): collectionName = "job_lang_top_corps" infoCollectionName = "jobinfo_lang_top_corps" dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) infoCollection = dbClient.getCollection(infoCollectionName) getter = IndeedPageGetter(infoCollection) pageSize = 10 pageNo = 149 has_more = True pageNum = 10000 find_sort = None find_spec = None while has_more and pageNo <= pageNum: page = dbClient.getPage(collection, find_spec, find_sort, pageSize, pageNo) getter.processPage(page, pageNo) pageNo += 1 count = page.count(with_limit_and_skip=True) # print "count=",count if (count < pageSize): has_more = False
def main(): cities = [ 'MoutainView, CA', 'Seattle, WA', 'San Diego, CA', 'San Francisco, CA', 'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY', 'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA', ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA' ] cities = [ 'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY', 'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA', ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA' ] _pageSize = 25 _fromage = 30 _location = 94040 _radius = 25 _query = "software engineer" collectionName = "job_se_10city" indeedClient = ApiClient(_query, _pageSize, _fromage, _location, _radius) # client.getPage(0) dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) for city in cities: print "-----prcoss city %s -------" % city indeedClient.processCity(collection, city)
def main(): df = pd.read_csv(RESULTS_CALENDER_PATH, skiprows=1, names=RESULTS_CALENDER_COLUMNS) dbClient = DbClient() for i, record in enumerate(yaml.safe_load(df.to_json(orient='records'))): record['result_date'] = datetime.datetime.strptime( record['result_date'], "%d %b %Y").strftime('%Y-%m-%d') dbClient.update_results_calender(record)
def main(): collectionName = "jobinfo_se_top_corps" dbClient = DbClient('localhost', 27017, "jobaly") pageProcessor = CollectionPageProcessor(dbClient, collectionName) # pageProcessor.process(printPageCount, pageNo = 1, pageNum=10 ) pageProcessor.process(addField, pageNo=1, pageNum=100, pageSize=100, find_sort=[("_id", 1)])
def main(): stopwords = getStopWords("stopwords.txt") tfgetter = TfGetter(stopwords) collectionName = "jobinfo_se_top_corps" dbClient = DbClient('localhost', 27017, "jobaly") pageProcessor = CollectionPageProcessor(dbClient, collectionName) pageProcessor.process(tfgetter.processPage, pageNo=1, pageNum=100, find_sort=[("_id", 1)]) printDict(tfgetter.term_num_docs, "term_idf.txt")
def getByCities(): cities = [ 'Austin, TX', 'San Jose, CA', 'Portland, OR', ' New York, NY', 'Houston, TX', 'Boston, MA', 'Davis, CA', 'Palo Alto, CA', ' Irvine, CA', 'Olathe, KS', 'Columbia, MD', ' Atlanta, GA' ] param = {"q": "software engineer", "fromage": "30"} collectionName = "job_se_10city" indeedClient = ApiClient(param) # client.getPage(0) dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) for city in cities: print "-----prcoss city %s -------" % city indeedClient.processQuery(collection, "l", city)
def getByCorps(): print " --- get job by companies---" collectionName = "job_se_top_corps" param = {"q": "software engineer", "fromage": "30"} indeedClient = ApiClient(param) # client.getPage(0) dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) corps = [] fileName = "topcorps.txt" with open(fileName, 'r') as the_file: for line in the_file: word = line.strip() if not len(word) == 0: corps.append(word) for corp in corps: q = indeedClient.buildQuery("software engineer", {"company": corp}) print "-----prcoss corp %s -------" % corp indeedClient.processQuery(collection, "q", q)
def getByLang(): print " --- get job by language and companies---" collectionName = "job_lang_top_corps" param = { "q" : "software engineer", "fromage" : "30" } lang_names = utils.loadArrayFromFile("pro_langs.txt") corps_names = utils.loadArrayFromFile("topcorps.txt") indeedClient= ApiClient( param ) # client.getPage(0) dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) for corp in corps_names: for lang in lang_names: q = indeedClient.buildQuery(lang, {"company": corp }) print "-----prcoss corp %s with language %s -------" % (corp, lang) indeedClient.processQuery(collection, "q", q)
def main(): collectionName = "job_lang_top_corps" dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) title_dict = {} for job in collection.find(): # print job["_id"], job["jobtitle"] title = job["jobtitle"] if title_dict.has_key(title): title_dict[title] += 1 else : title_dict[title] = 1 stat_file_name = "jobtitle_stat.txt" with open( stat_file_name , "w") as text_file: i = 0 for (key, value) in sorted(title_dict.iteritems(), key=operator.itemgetter(1), reverse = True): # print key, ":", value text_file.write("%s : %s \n" % (key.encode('utf8'),value)) i+=1 print i, " lines had been writen into file:", stat_file_name
def main(): collectionName = "job_se_10city" infoCollectionName = "jobinfo_se_10city" collectionName = "job_lang_top_corps" infoCollectionName = "jobinfo_lang_top_corps" dbClient = DbClient('localhost', 27017, "jobaly") collection = dbClient.getCollection(collectionName) infoCollection = dbClient.getCollection(infoCollectionName) pageSize = 20 pageNo = 1 has_more = True pageNum = 10000 find_sort = None find_spec = None threadNum = 20 queue = Queue.Queue() for i in range(threadNum): t = JobGetter(queue, infoCollection) t.setDaemon(True) t.start() while has_more and pageNo <= pageNum: page = dbClient.getPage(collection, find_spec, find_sort, pageSize, pageNo) queue.put((page, pageNo)) pageNo += 1 count = page.count(with_limit_and_skip=True) # print "count=",count if (count < pageSize): has_more = False queue.join()
def main(backup_day): global NSE_DAILY_DATA_URL, NSE_DAILY_DATA_CSV_ZIP_FILENAME, NSE_DAILY_DATA_CSV_FILENAME, DOWNLOADS_PATH year, month, _day = backup_day.split('-')[0], MONTH_NAMES[backup_day.split( '-')[1]], backup_day.split('-')[2] NSE_DAILY_DATA_URL = NSE_DAILY_DATA_URL.format(day=_day, month=month, year=year) NSE_DAILY_DATA_CSV_ZIP_FILENAME = NSE_DAILY_DATA_CSV_ZIP_FILENAME.format( day=_day, month=month, year=year) NSE_DAILY_DATA_CSV_FILENAME = NSE_DAILY_DATA_CSV_FILENAME.format( day=_day, month=month, year=year) logging.info("Started backup for the day: {}".format(backup_day)) file_path = os.path.join(DOWNLOADS_PATH, NSE_DAILY_DATA_CSV_ZIP_FILENAME) if not os.path.exists(file_path): if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) print NSE_DAILY_DATA_URL req = urllib2.Request(NSE_DAILY_DATA_URL, headers=NSE_DAILY_DATA_HEADERS) page = urllib2.urlopen(req) with open(file_path, 'wb') as writer: writer.write(page.read()) zf = zipfile.ZipFile(file_path) df = pd.read_csv(zf.open(NSE_DAILY_DATA_CSV_FILENAME), skiprows=1, names=NSE_DAILY_DATA_COLUMNS) del df['empty'] dbClient = DbClient() for i, record in enumerate(yaml.safe_load(df.to_json(orient='records'))): record['added_on'] = datetime.datetime.strptime( backup_day, "%Y-%m-%d").strftime('%Y-%m-%d %H:%M:%S') record['p_change'] = round( (float(record['close']) - float(record['prev_close'])), 2) record['change_percent'] = round( (float(record['close']) - float(record['prev_close'])) / float(record['prev_close']) * 100, 2) dbClient.update_nse_daily_data(record)
from pymongo import MongoClient import exceptions from settings import AUTH_TOKEN import os import urllib app = Flask(__name__) app.config.update( MONGODB_HOST='mongo', MONGODB_PORT='27017', MONGODB_DB='processed_meetings', ) CORS(app) dbclient = DbClient(MongoClient("mongo", 27017).processed_meetings) @app.before_request def verify_token(): # TODO make this a little more secure if "/health" in request.url: return None url_token = request.args.get('token') if not url_token: return jsonify({"error": "Valid Token Required"}), 401 elif url_token != AUTH_TOKEN: return jsonify({"error": "Invalid Token Provided"}), 401 else: return None
# get server credentials from config configur = ConfigParser() configur.read(configPath) servermode = 'cloud' # Either 'cloud' or 'local' hostname = configur.get(servermode, 'host') password = configur.get(servermode, 'password') port = configur.getint(servermode, 'port') #initialization screen = Screen() analyzer = Analyzer() scene = Scene() db = DbClient(screen, analyzer, scene.handleUpdate, host=hostname, port=port, password=password) def setup(): sid = db.getActive() scene.setup(sid, db, analyzer, screen) def main(): # for layer in layers: # layer.draw(screen.ctx) # star.draw() #rms, bark, silent = analyzer.get() #screen.update(rms, bark, silent)
def main(): """Main program handler""" with DbClient() as db: execute_queries(db)
def __init__(self): self.dbclient = DbClient() pass
def main(): """Main program handler""" with DbClient() as db: drop_tables(db) create_tables(db)
def main(): """Main program handler""" with DbClient() as db: load_staging_tables(db) insert_tables(db)
def __init__(self): self.task_handler = TaskHandler(self) self.dbaccess = DbClient(True, self.task_handler)