def redirect(inpt): candidates = [] terms = inpt.strip().lower().decode("utf-8").split("|") for dir, dirs, files in os.walk(root): for file in files: rel_path = os.path.normpath(os.path.normcase(os.path.relpath(dir + "/" + file, root))) # print "%s,%s,%s , %s" % (dir,dirs,file,rel_path) path_terms = rel_path.lower().split("\\") path_terms = expand_list([i.split("/") for i in path_terms]) # print terms # print path_terms if subsequence(path_terms, terms): candidates.append(rel_path) if len(candidates) == 0: _logger.warning('Not found "%s"' % (inpt)) return "" elif len(candidates) >= 1: if len(candidates) > 1: _logger.warning('Ambiguious for "%s":' % (inpt)) for i in candidates: _logger.warning("\t%s\n" % (i)) ret = os.path.normcase(os.path.abspath(root + "/" + candidates[0])) _logger.info('Redirect "%s" to "%s"' % (inpt, ret)) return ret
def ask_google(keywords, needed, proxy=None, callback=None, terminate=None, sleep_min=1, sleep_max=3): keywords = urllib.quote_plus(keywords) random.seed() if needed > 1000: needed = 1000 br = get_browser() if proxy != None: br.set_proxies({'http': proxy, 'https': proxy}) results = set() url = 'http://www.google.com/search?q=%s' % keywords current_page = 1 # Kick off searching fail_num = 0 _logger.info('searching [%s] for %d results from %s' % (keywords, needed, url)) while fail_num < 5: try: response = br.open(url, timeout=5.0) break except Exception, err: _logger.error('initial fetching failed(%s): %s' % (url, err)) fail_num += 1
def train(self, outpath): self.v = defaultdict(float) for i in xrange(self.iter): _logger.info("training iteration %d" % (i+1)) self.train_iteration(self.train_file) with open(outpath, "w") as outfile: for k, v in self.v.items(): if v == 0.0: continue outfile.write("%s %.1f\n" % ('#'.join(k), v))
def lock(self, top_bound=None): with log_yield(measure="queue.lock"): if top_bound is None: top_bound = self.top_bound with self.conn_adapter.connection.cursor(cursor_factory=LoggingRealDictCursor) as curs: curs.execute("SELECT * FROM lock_head(%s, %s)", [self.name, top_bound]) if not curs.rowcount: return None job = curs.fetchone() # NOTE: JSON in args is parsed automatically # timestamptz columns are converted automatically to datetime if job["created_at"]: now = datetime.datetime.now(job["created_at"].tzinfo) ttl = now - job["created_at"] _logger.info("measure#qc.time-to-lock=%sms source=%s" % (int(ttl.microseconds / 1000), self.name)) return job
# this method will unlock the job in the queue. def process(self, queue, job): start = datetime.datetime.now() finished = False try: self.call(job) queue.delete(job['id']) finished = True except Exception, e: self.handle_failure(job, e) finished = True finally: if not finished: queue.unlock(job['id']) ttp = datetime.datetime.now() - start _logger.info("measure#qc.time-to-process=%s source=%s" % (int(ttp.microseconds / 1000), queue.name)) # Each job includes a method column. We will use ruby's eval # to grab the ruby object from memory. We send the method to # the object and pass the args. def call(self, job): args = job['args'] receiver_str, _, message = job['method'].rpartition('.') if receiver_str: module = importlib.import_module(receiver_str, self.__module__) else: import __main__ module = __main__ getattr(module, message)(*args) # This method will be called when an exception