def execute_one() -> None: try: os.makedirs("./tmp/", exist_ok=True) db = DBHelper() job_id = db.get_import_job() if job_id is None: print("no job, wait") time.sleep(const.NO_JOB_WAIT) return try: job_doc = db.lock_import_job(job_id) except Exception as e: log("unable to lock, skip: ", e) # random backoff time time.sleep(0.01 * random.randint(1, 40)) return try: download(job_doc) add_to_db(job_doc, db) except Exception as e: log("execution error: ", e) # release lock job_doc["lock_timestamp"] = 0 job_doc.save() except Exception as e: log("unknown error: ", e) finally: log("clean up tmp folder")
def get_xxx_detail(text, table_name): dbHelp = DBHelper(user_config.db_host, user_config.db_port, user_config.db_user, user_config.db_password, user_config.db_database) soup = BeautifulSoup(text, "html.parser") # 如果不指定name,指定attrs,那么会找所有好友该属性的标签。 menu_lst_tag = soup.find(name="div", attrs={"class": "rstdtl-menu-lst"}) #找一层就可以了。 menu_head_content_tags = menu_lst_tag.findChildren(recursive=False) menu_type = "" for menu_head_content_tag in menu_head_content_tags: #print(menu_head_content_tag) if menu_head_content_tag.get("class")[0] == "rstdtl-menu-lst__heading": menu_type = my_util.getTagText(menu_head_content_tag) continue info = {} info["type"] = menu_type info["name"] = my_util.getTagText( menu_head_content_tag.find( name="p", attrs={"class": "rstdtl-menu-lst__menu-title"})) img_tag = menu_head_content_tag.find(name="img") img_href = img_tag.get("src") if img_tag else "" if img_href != "": info["img"] = down_file(img_href, img_href[img_href.rfind("/") + 1:]) info["price"] = my_util.getTagText( menu_head_content_tag.find( name="p", attrs={"class": "rstdtl-menu-lst__price"})) info["description"] = my_util.getTagText( menu_head_content_tag.find(name="p", attrs={"class": "rstdtl-menu-lst__ex"})) #print(info) save_data(table_name, info, dbHelp) dbHelp.closeDB()
def get_host_ip(user_id, project_name, service_name): prj_id = Db.exec_one("select id from projects " "where name='%s' and userID='%s')", (project_name, user_id)) return Db.exec_one("select IP from services " "where name='%s' and projectID='%s'", (service_name, prj_id))
def delete_all_services(user_name, project_name): prj_id = Db.exec_one("select id from projects " "where name='%s' and userID = (select id from user where name = '%s')", (project_name, user_name)) if prj_id is None: raise Exception("Project does not exist for %s, %s" % (user_name, project_name)) Db.exec_cmd("delete from services where projectID = '%s'", prj_id)
def get_list(user_id, project_name): prj_id = Db.exec_one("select id from projects " "where name = '%s' and userID=(select id from user where name='%s')", (project_name, user_id)) if prj_id is None: raise Exception("Project does not exist for %s, %s" % (user_id, project_name)) return Db.exec_list("select name from services where projectID='%s'", prj_id)
def get_list(user_id, start_index, count): """ 返回用于分页的某个用户的所有项目; 如果count <= 0,则忽略 start_index,返回全部列表 """ if count <= 0: return Db.exec_list("select name, url from projects " "where userID = '%s'", user_id) else: return Db.exec_list("select name, url from projects " "where userID = '%s' limit %s,%s", (user_id, start_index, count))
def build_event_sm(events): d = DBHelper() d.recreate_tables() ts = 0 event_i = 0 sm = EventSM() for event in events: event_i += 1 try: sm.digest(event) except Exception as e: print "Exception in event #%d: %s" % (event_i, str(e)) raise e return sm
def get_party_detail(links): dbHelp = DBHelper(user_config.db_host, user_config.db_port, user_config.db_user, user_config.db_password, user_config.db_database) for link in links: info = {} info["link"] = link text = get_html(link) if text == "EOF" or text == "ERR": print("获取失败:" + link) continue soup = BeautifulSoup(text, "html.parser") #如果不指定name,指定attrs,那么会找所有好友该属性的标签。 title_tag = soup.find(name="h3", attrs={"class": "course-dtl__course-title"}) info["name"] = my_util.getTagText(title_tag) img_div_tag = soup.find(name="div", attrs={"class": "course-dtl__img"}) img_tag = img_div_tag.find(name="img") if img_div_tag else None img_href = img_tag.get("src") if img_tag else "" if img_href != "": info["img"] = down_file(img_href, img_href[img_href.rfind("/") + 1:]) desc_tag = soup.find(name="div", attrs={"class": "course-dtl__desc"}) info["description"] = my_util.getTagText(desc_tag) table_tag = soup.find( name="table", attrs={"class": "c-table c-table--form course-dtl__data-table"}) info_map = { "コース料金": "price", "品数": "num", "滞在可能時間": "free_time", "コース内容": "content" } if table_tag: trs = table_tag.select("tbody tr") for tr in trs: th = tr.find(name="th") th_text = my_util.getTagText(th) if th_text in info_map: info[info_map.get(th_text)] = my_util.getTagText( tr.find(name="td")) # print(info) save_data("STORE_PARTY", info, dbHelp) dbHelp.closeDB()
def __init__(self): self.db_helper = DBHelper() self._init_counters() self._init_callbacks() self._alone_time = 0 self._where_away = None self._away_ts = 0
def get_project_by_id(id): projects = DBHelper().fetch(u"SELECT * FROM projects WHERE id=%s;" % (id)) if (len(projects)): return projects[0] return None
def get_requirements_by_date(project_id, base_date): return DBHelper().fetch(u" SELECT r.*" u" FROM requirements r" u" INNER JOIN projects p ON p.id = r.project_id" u" WHERE p.id = %s" u" AND r.added >= '%s'" % (project_id, base_date))
def insert_recommendation(project_id, risk_id, base_date, distance, sample, steps, type): DBHelper().execute( u" INSERT INTO recommendations" u" (project_id, risk_id, base_date, distance, sample, steps, type)" u" VALUES (%s, %s, '%s', %s, %s, %s, '%s');" % (project_id, risk_id, base_date, distance, sample, steps, type))
def get_risk_by_id(id): risks = DBHelper().fetch(u"SELECT * FROM risks WHERE id=%s;" % (id)) if (len(risks)): return risks[0] return None
def add_import_job(start_date_s: str, end_date_s: str) -> None: start_date = date.fromisoformat(start_date_s) end_date = date.fromisoformat(end_date_s) db = DBHelper() while start_date <= end_date: date_str = "{},{},{}".format(start_date.year, start_date.month, start_date.day) cmd = config.curl_command_template.format(date_str, date_str).strip() data = { '_id': start_date.isoformat(), 'curl_cmd': cmd, 'finished': False, 'lock_timestamp': 0, 'work_node': None, 'total_num': None, 'import_num': None } if start_date.isoformat() not in db.client["import_job"]: print(start_date) db.client["import_job"].create_document(data) start_date += timedelta(days=1)
def get_risks_by_date(project_id, base_date): return DBHelper().fetch(u" SELECT r.*" u" FROM risks r" u" INNER JOIN projects p ON p.code = r.code" u" WHERE p.id = %s" u" AND r.added >= '%s'" % (project_id, base_date))
def main(argv): params = argv if len(params) == 1: params = IrishNameParser().names else: params = argv[1:] counter = 0 profile_downloader = PublicProfileDownloader() for param in params: profile_downloader.download(param) counter += NUM if counter > 500: time.sleep(120) counter -= 500 DBHelper.commitAndClose()
def get_requirement_by_id(id): requirements = DBHelper().fetch( u"SELECT * FROM requirements WHERE id=%s;" % (id)) if (len(requirements)): return requirements[0] return None
def delete_recommendations(distance, sample, steps, type): DBHelper().execute( u" DELETE " u" FROM recommendations " u" WHERE CAST(distance AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND CAST(sample AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND CAST(steps AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND type='%s';" % (distance, sample, steps, type))
def get_project_by_rand(): projects = DBHelper().fetch( u"SELECT * FROM projects ORDER BY RAND() LIMIT 1;") if (len(projects)): return projects[0] return None
def harvest_single_user(maintask: MainTask, api: tweepy.API, doc: cloudant.document, db: DBHelper) -> bool: max_id = 0 min_id_last_round = None counter = 0 while True: kwargs = { "user_id": doc["_id"], "since_id": int(doc["last_harvest_tweet_id"]) + 1, "include_rts": "false" } if min_id_last_round is not None: kwargs["max_id"] = str(min_id_last_round - 1) try: status_list = api.user_timeline(**kwargs) except Exception as e: maintask.log("user tweets: twitter api error, backoff", e) return False ids = [] if len(status_list) == 0: break for status in status_list: counter += 1 t_json = status._json t_id = int(t_json["id_str"]) max_id = max(max_id, t_id) ids.append(t_id) db.add_tweet(t_json) min_id_last_round = min(ids) maintask.log("user tweets: ids from ", min(ids), "to", max(ids)) doc["last_harvest_tweet_id"] = str( max(max_id, int(doc["last_harvest_tweet_id"]))) doc["last_harvest"] = int(time.time()) doc.save() maintask.log("user tweets: got tweets", counter) return True
def login(email, password): password = get_md5_value(password) user = DBHelper.get_user(email) if user: if user.password == password: session['email'] = email return True else: return False return False
def get_requirements_distance(req_a_id, req_b_id): distance = DBHelper().fetch(u" SELECT * " u" FROM requirements_distance " u" WHERE req_a_id=%s " u" AND req_b_id=%s;" % (req_a_id, req_b_id)) if (len(distance)): return distance[0] return None
def __init__(self): self.db = DBHelper() self.bot = Bot() self.controls = {} self.bot.message_loop( { 'chat': self.handle, 'callback_query': self.on_callback } )
def dump(filename: str, keep_auth: bool = False) -> None: data = {} dbh = DBHelper() dbs = list(filter(lambda x: x[0] != '_', dbh.client.all_dbs())) for db in dbs: dds = dbh.client[db].design_documents() if not keep_auth: dds = list(filter(lambda x: x['id'] != '_design/auth', dds)) data[db] = dds with open(filename, 'w') as outfile: json.dump(data, outfile)
def get_project_by_rand(): project = DBHelper().fetch(u" SELECT *" u" FROM projects" u" WHERE id < 18" u" ORDER BY rand()" u" LIMIT 1") if (len(project)): return project[0] return None
def delete(user_id, password, project_name): # todo: 删除容器考虑在上层实现? srv_list = DBModel.Service.get_list(user_id, password, project_name) if len(srv_list) != 0: for service_name in srv_list: ip = DBModel.Service.get_host_ip(user_id, project_name, service_name) if ip == '-': continue else: # rm this container # cli = Client(base_url=url, version=config.c_version) # full_name = username + config.split_mark + project_name + config.split_mark + service_name # if container_exists(cli, full_name): # logs = logs + full_name + '\n' + cli.logs(container=full_name) + '\n' # cli.stop(container=full_name) # cli.remove_container(container=full_name) pass Db.exec_cmd("delete from service where project='%s'", project_name) Db.exec_cmd("delete from project where name='%s'", project_name)
def get_risks_distance(risk_a_id, risk_b_id): distance = DBHelper().fetch(u" SELECT * " u" FROM risks_distance " u" WHERE risk_a_id=%s " u" AND risk_b_id=%s;" % (risk_a_id, risk_b_id)) if (len(distance)): return distance[0] return None
def sync_tasks(data): ''' insert or update database ''' try: list_id = data[0]['list_id'] d = DBHelper.fetchall( "select task_id from {0} where list_id = {1}".format( tb_tasks, list_id)) d2 = [o['id'] for o in data] _delete_task = [ ddd['task_id'] for ddd in d if ddd['task_id'] not in d2 ] for dx in _delete_task: print 'now delete..' sql = "delete from %s where task_id = '%s'" % (tb_tasks, dx) print sql DBHelper.delete(sql) except IndexError, e: print 'index error ,msg:{}'.format(e) print 'data:{}'.format(data)
def __init__(self, reactor, pool, init_url, conf, use_pool=False): self.logger = logging.getLogger("") self.reactor = reactor self.pool = pool self._parse_conf(conf) self.db_helper = DBHelper(conf) self.url_dedup = URLDedup(conf) self.cleaner = lxml.html.clean.Cleaner(style=True, scripts=True, page_structure=False, safe_attrs_only=False) self.html_parser = lxml.html.HTMLParser(encoding='utf-8') self.init_url = init_url self.use_pool = use_pool self.flow_control = TimeFlowControl(1, 60)
def process_a_image(url: str, db: DBHelper, tweet_id: str) -> Dict[str, Any]: with tempfile.NamedTemporaryFile(dir=TMP_FOLDER, delete=True, suffix=".jpg") as tmpf: res = requests.get(url + ":small") if res.status_code != 200: raise Exception("unable to donwload image") content = res.content tmpf.write(content) tmpf.flush() # run yolo lock.acquire() try: results = detector.detect(tmpf.name) finally: lock.release() db.add_tweet_image_with_yolo(url, results, content, tweet_id) return results
def getPublicProfiles(limit=1000): path = 'user_raw' profile_paths = [] for f in DBHelper.getNotRDFedFileName(limit): file_path = join(path, f) profile_paths.append(file_path) # parser = ProfileParser(file_path) # profile = parser.parseHtml() # profiles.append(profile) # return profiles return profile_paths
def get_projects_non_processed(distance, sample, steps, type): return DBHelper().fetch( u" SELECT p.*" u" FROM projects p" u" WHERE p.id NOT IN(" u" SELECT project_id" u" FROM recommendations" u" WHERE CAST(distance AS DECIMAL(5,1)) = %s" u" AND CAST(sample AS DECIMAL(5,1)) = %s" u" AND CAST(steps AS DECIMAL(5,1)) = %s" u" AND type = '%s'" u" )" u" ORDER BY p.id ASC" % (distance, sample, steps, type))
def delete_user_and_projects(user_name): """ 删除用户及该用户的所有项目和所属服务 """ # todo: 删除所有项目和所属服务是否有上层负责,或以事务方式执行? # todo: 合并下面的多条sql cmds? user_id = Db.exec_one("select id from user where name='%s'", user_name) Db.exec_cmd("delete from services where projectID in " "(select id from projects where userID='%s')", user_id) Db.exec_cmd("delete from projects where userID='%s'", user_id) Db.exec_cmd("delete from user where name='%s'", user_id)
def harvest_twitter_tweet_process_meta_update() -> None: db = DBHelper() count = 0 for doc in db.client["harvest_twitter_tweet"]: if "locked" in doc["process_meta"]: # old version doc["process_meta"] = {'lock_timestamp': 0, 'processed': False} doc.save() count += 1 if count % 100 == 0: print(count) print("finished.", count)
def add_to_db(job_doc: cloudant.document, db: DBHelper) -> None: total_num = 0 import_num = 0 with open('./tmp/twitter.json', 'r') as f: # skip things like "{"total_rows":3877777,"offset":805584,"rows":[" f.readline() for l in f: # skip last line if l.strip() == ']}': continue try: # load one doc data = json.loads(l.rstrip(",\r\n ")) data = data["doc"] data.pop("_id", None) data.pop("_rev", None) except Exception as e: log("unknow parse error, skip: ", e) continue total_num += 1 if db.add_tweet_import(data): import_num += 1 if total_num % const.JOB_UPDATE_PER_TWEET == 0: print("t: ", total_num, "i", import_num) # update doc, mainly for checking if someone else taken over try: job_doc["total_num"] = total_num job_doc["import_num"] = import_num job_doc.save() except Exception as e: log("lock conflict: ", e) return # complete try: job_doc["total_num"] = total_num job_doc["import_num"] = import_num job_doc["finished"] = True job_doc.save() log("finished") except Exception as e: log("unable to finish: ", e) return
class OrderDAO(): def __init__(self): self.db_helper = DBHelper() self.db_helper.open_conn() def __del__(self): self.db_helper.close_conn() def query_all_order(self): ''' 查询所有订单 :return: 所有订单实体组成的列表(list) or None ''' order_list = [] sql = 'SELECT * FROM orders LIMIT 0,10' result = self.db_helper.do_query(sql) if not result: print('查询结果为空') return None for row in result: order_id = row[0] cust_id = row[1] if row[4]: products_num = int(row[4]) else: products_num = 0 if row[5]: amt = float(row[5]) else: amt = 0 order_list.append(Order(order_id, cust_id, products_num, amt)) return order_list def query_by_id(self, id): sql = 'select * from orders WHERE order_id = %s' % (id) result = self.db_helper.do_query(sql)[0] if not result: print('查询结果为空') return None order_id = result[0] cust_id = result[1] if result[4]: products_num = int(result[4]) else: products_num = 0 if result[5]: amt = float(result[5]) else: amt = 0 order = Order(order_id, cust_id, products_num, amt) return order
def dump_all(dump_dir: str) -> None: data = {} dbh = DBHelper() dbs = list(filter(lambda x: x[0] != '_', dbh.client.all_dbs())) url_obj = furl(config.couchdb_host) url_obj.username = config.couchdb_user url_obj.password = config.couchdb_auth_token url = url_obj.url for db in dbs: _url = url + db + "/_all_docs?include_docs=true&attachments=true" print(_url) cmd = r"""curl "{}" -G -o "{}" """ cmd = cmd.format(_url, os.path.join(dump_dir, db + ".json")) exit_code = subprocess.call(cmd, shell=True) if exit_code != 0: raise Exception("unable to download")
def sync_lists(): ''' insert or update database :return data: list data, json, ref https://developer.wunderlist.com/documentation/endpoints/list ''' data = get_lists() print data d = DBHelper.fetchall("select list_id from %s" % tb_lists) d2 = [o['id'] for o in data] _delete_list = [ddd['list_id'] for ddd in d if ddd['list_id'] not in d2] for dx in _delete_list: print 'now delete..' sql = "delete from %s where list_id = '%s'" % (tb_lists, dx) print sql DBHelper.delete(sql) for o in data: q = DBHelper.fetchall("select * from %s" " where list_id = '%s'" % (tb_lists, o['id'])) #updated_at = dateutil.parser.parse(o['updated_at']).date() # deprecated updated_at = datetime.now().date() created_at = dateutil.parser.parse(o['created_at']).date() if q: q = q[0] if str(q['updated_at']) == str(updated_at): print 'no update!' else: print '%s update now...' % o['title'] update_sql = "update %s set title='%s', created_at = '%s', updated_at='%s', content='%s' where list_id='%s'" % ( tb_lists, o['title'], created_at, updated_at, json.dumps(o), o['id']) DBHelper.update(update_sql) else: print 'insert now...' sql = '''insert into %s (title,created_at,updated_at,content,list_id) values('%s','%s','%s', '%s','%s')''' % ( tb_lists, o['title'], created_at, updated_at, json.dumps(o), o['id']) print sql DBHelper.update(sql) print 'success sync lists' return data
def tweet_data_melb_time_update() -> None: db = DBHelper() count = 0 for doc in tqdm.tqdm(db.client["tweet_data"], total=db.client["tweet_data"].doc_count()): if "created_at_melb_time" not in doc["data"]: # old version time = doc["data"]["created_at"] melb_time = datetime.strptime(time, '%a %b %d %H:%M:%S %z %Y')\ .replace(tzinfo=timezone.utc).astimezone(pytz.timezone('Australia/Melbourne')) doc["data"]["created_at_melb_time"] = \ [melb_time.year, melb_time.month, melb_time.day, melb_time.hour, melb_time.minute, melb_time.second] doc.save() count += 1 if count % 100 == 0: print(count) print("finished.", count)
def handle_tweet_media(tweet_json: Dict[str, Any], worker: Worker, db: DBHelper) -> Optional[List[Dict[str, Any]]]: try: res = [] if "extended_entities" not in tweet_json or "media" not in tweet_json[ "extended_entities"]: return [] media = tweet_json["extended_entities"]["media"] for img in media: try: img_url: str = img["media_url_https"] if img_url.startswith( "https://pbs.twimg.com/") and img_url.endswith(".jpg"): worker.log("handle_tweet_media: image", img_url) tmp = db.get_tweet_image_with_yolo(img_url) if tmp is not None: res.append(tmp) continue # handle image try: yolo_res = process_a_image(img_url, db, tweet_json["id_str"]) res.append({"url": img_url, "yolo": yolo_res}) except Exception as e: worker.log("handle_tweet_media: process a image error", e) return None except Exception as e: worker.log("handle_tweet_media: single media error", e) except Exception as e: worker.log("handle_tweet_media: unknown error", e) return [] return res
def run(num, file_name): rg = RG(file_name) profile_paths = getPublicProfiles(limit=num) try: for path in profile_paths: parser = ProfileParser(path) profile = parser.parseHtml() # print profile.extra_profile_list # Utils.putExtraProfilesIntoDB(profile.extra_profile_list) rg.add(profile) DBHelper.dataSetRDF(profile.file_name, rdf=1) except Exception: traceback.print_exc() rg.save(format='xml', file_name=file_name) rg.close() DBHelper.commitAndClose() else: rg.save(format='xml', file_name=file_name) rg.close() DBHelper.commitAndClose()
def get_company_profile(self, url, company_name): file_path = CPD.downloadByUrl(url, company_name) parser = CPP(file_path) company_profile = parser.parseHtml().content DBHelper.dataAddEntry(company_profile['file_name'], url, exist=1, type='COMPANY') return company_profile
def add_experience_triple(self, profile, person): for experience in profile.experience_list: if profile.city is None: if 'city' in experience: self.set_profile_city(person, profile, experience['city']) if 'job_title' in experience: job_title = experience['job_title'] job_title = self.position_helper(job_title) term = BNode() self.graph_add(term, RDF.type, self.schema.Position) self.graph_add(term, self.schema.occupation, Literal(job_title)) try: if experience['from'] and self.check_datetime_format(experience['from']): self.graph_add(term, self.schema.from_value, Literal(experience['from'], datatype=XSD.date)) except KeyError: pass try: if experience['to']: if self.check_datetime_format(experience['to']): self.graph_add(term, self.schema.to_time, Literal(experience['to'], datatype=XSD.date)) elif experience['to'].lower() == 'current' or experience['to'].upper() == 'now': self.graph_add(term, self.schema.to_time, Literal('now', datatype=XSD.string)) except KeyError: pass if 'company_name' in experience: company_name = experience['company_name'] company_name = self.company_name_helper(company_name) company = self.schema.get_term(company_name) self.graph_add(company, RDFS.label, Literal(company_name, datatype=XSD.string)) # we need to define this company if company_name not in self.companies: self.graph_add(company, RDF.type, self.schema.Organization) self.companies.add(company_name) # add city info cities = self.get_cities_by_company_name(company_name) for city in cities: self.graph_add(company, self.schema.city, self.schema.get_term(city)) if profile.city is None: if cities: self.set_profile_city(person, profile, cities[0]) # extra process required for if 'company_url' in experience: company_profile = self.get_company_profile(experience['company_url'], company_name) if 'Founded' in company_profile: self.graph_add(company, self.schema.formation_year, Literal(company_profile['Founded'], datatype=XSD.gYear)) if 'Company Size' in company_profile: mini, maxi = self.get_company_size(company_profile['Company Size']) self.graph_add(company, self.schema.from_value, Literal(mini, datatype=XSD.integer)) self.graph_add(company, self.schema.to_time, Literal(maxi, datatype=XSD.integer)) if 'Type' in company_profile: self.graph_add(company, self.schema.organization_type, Literal(company_profile['Type'], datatype=XSD.string)) if 'Industry' in company_profile: self.graph_add(company, self.schema.industry, self.schema.get_term(company_profile['Industry'])) DBHelper.dataSetRDF(company_profile['file_name'], rdf=1) self.graph_add(company, self.schema.has_position, term) self.graph_add(person, self.schema.works_as, term)
def get(user_id): data = Db.exec_cmd("select net from info where name='%s'", user_id) # todo: check tuple usage return data[0]
def exists(user_name, project_name): prj = Db.exec_one("select 1 from projects " "where name='%s' and userID = (select id from user where name = '%s')", (project_name, user_name)) return prj is not None
def __init__(self, conf): self.db_helper = DBHelper(conf) self.parser = HTMLParser(encoding='utf-8', remove_comments=True, remove_blank_text=True) self.cleaner = lxml.html.clean.Cleaner(style=True, scripts=True, page_structure=False, safe_attrs_only=False, comments=True, javascript=True)
config = Configurations() # Apply configurations env_lang = config.get_language() set_proto_lang(env_lang) set_msg_lang(env_lang) # Get network IP addr if cctv_url == '': cctv_url = 'http://' tempstr = subprocess.check_output('dig +short myip.opendns.com @resolver1.opendns.com', shell=True) tempstr.replace('\r', '') tempstr.replace('\n', '') cctv_url += tempstr.strip() cctv_url += ':' + cctv_port print 'Found external IP = ' + cctv_url # Initialize DB dbhelper = DBHelper(MYSQL_USER, MYSQL_PASS, MYSQL_DB) dbhelper.connect() dbhelper.checktables() # Connect serial t_ser = SerialThread(recv_queue, send_queue, incoming_cmd_callback) t_ser.connect() # Make worker thread t_dev = DeviceManagerThread(dbhelper, recv_queue, send_queue, device_thread_callback) # Start main loop if __name__ == '__main__': try: # Start worker thread t_dev.load_devices() t_dev.load_noti() t_dev.load_macro()
def get_info(user_id, project_name): # todo: name已经知道了,url是git repo的url吗? return Db.exec_one("select name, url from projects where userID = '%s' and name = '%s'", (user_id, project_name))
class EventSM: AWAKE = "awake" PUTTING_TO_SLEEP = "put to sleep" SLEEPING = "sleeping" def __init__(self): self.db_helper = DBHelper() self._init_counters() self._init_callbacks() self._alone_time = 0 self._where_away = None self._away_ts = 0 def _init_counters(self): self.state = EventSM.AWAKE self.last_rec_ts = 0 self.accum_sleeping_time = 0 self.accum_put_to_sleep_time = 0 self.days = set() def _init_callbacks(self): self.cb = { 'putting to sleep': self._put_to_sleep, 'failed putting to sleep': self._failed_to_put_to_sleep, 'slept': self._fell_asleep, 'woke up': self._woke_up, 'megi left': self._alone, 'megi\'s back': self._not_alone, 'away': self._away, 'back home': self._back_home } def _add_sleep_state(self, old_state, end_ts, new_state): success = not (old_state == EventSM.PUTTING_TO_SLEEP and new_state == EventSM.AWAKE) length_secs = end_ts - self.last_rec_ts self.db_helper.add_sleep_state(old_state, self.last_rec_ts, length_secs, success) self.last_rec_ts = end_ts self.state = new_state return length_secs def _put_to_sleep(self, ts, event): if self.state != EventSM.AWAKE: raise Exception('Cannot put to sleep an awake baby...') self._add_sleep_state(EventSM.AWAKE, ts, EventSM.PUTTING_TO_SLEEP) def _failed_to_put_to_sleep(self, ts, event): if self.state != EventSM.PUTTING_TO_SLEEP: raise Exception('Cannot fail to put baby to sleep without trying first...') s = self._add_sleep_state(EventSM.PUTTING_TO_SLEEP, ts, EventSM.AWAKE) self.accum_put_to_sleep_time += s def _fell_asleep(self, ts, event): if self.state != EventSM.PUTTING_TO_SLEEP: raise Exception('Baby cannot sleep without being put to sleep...') s = self._add_sleep_state(EventSM.PUTTING_TO_SLEEP, ts, EventSM.SLEEPING) self.accum_put_to_sleep_time += s def _woke_up(self, ts, event): if self.state != EventSM.SLEEPING: raise Exception('Cannot wake up without sleeping first...') s = self._add_sleep_state(EventSM.SLEEPING, ts, EventSM.AWAKE) self.accum_sleeping_time += s def _alone(self, ts, event): if self._alone_time != 0: raise Exception('Already alone, cannot be left alone again...') self._alone_time = ts def _not_alone(self, ts, event): if self._alone_time == 0: raise Exception('Cannpt be not alone with being left alone first...') t = ts - self._alone_time self._alone_time = 0 self.db_helper.add_time_alone(ts, t) def _away(self, ts, event): if self._where_away != None: if self._away_ts == 0: raise Exception('Cannot go away again without leaving first...') self.db_helper.add_time_away(self._away_ts, ts - self._away_ts, self._where_away) self._where_away = event.get_subtype() self._away_ts = ts def _back_home(self, ts, event): if self._away_ts == 0 or self._where_away == None: raise Exception('Cannot get back home without leaving...') self.db_helper.add_time_away(self._away_ts, ts - self._away_ts, self._where_away) self._where_away = None self._away_ts = 0 def digest(self, event): self.db_helper.add_event(event) event_date = event.get_date() event_ts = event.get_ts() event_type = event.get_type() event_subtype = event.get_subtype() if self.last_rec_ts == 0: self.last_rec_ts = event_ts self.days.add(event_date) if not self.cb.has_key(event_type): return cb = self.cb[event_type] if type(cb) == dict: cb = cb[event_subtype] cb(event_ts, event) def get_state(self): return self.state def get_different_days(self): return len(self.days) def get_sleep_time(self): return self.accum_sleeping_time def get_put_to_slee_time(self): return self.accum_put_to_sleep_time
def create(user_id, volume_path): Db.exec_cmd("insert into info(volume) values('%s') where name='%s'", (user_id, volume_path))
def create(user_name, service_name, machine_ip, project_name): prj_id = Db.exec_one("select id from projects " "where name='%s' and userID in (select id from user where name = '%s')", (project_name, user_name)) Db.exec_cmd("insert into services(name, projectID, IP) values('%s', %s, '%s')", (service_name, prj_id, machine_ip))
def add_user(user_name, email): Db.exec_cmd("insert into user(name, email) values('%s', '%s')", (user_name, email))
class SeedMiner(object): def __init__(self, conf): self.db_helper = DBHelper(conf) self.parser = HTMLParser(encoding='utf-8', remove_comments=True, remove_blank_text=True) self.cleaner = lxml.html.clean.Cleaner(style=True, scripts=True, page_structure=False, safe_attrs_only=False, comments=True, javascript=True) def read_html(self, object_id, url): # object_id = 'index.html' try: fin = open('../data/mining_page/'+object_id) except IOError: return None body = fin.read() fin.close() #body = body.decode('gbk').encode('utf-8') tree = document_fromstring(body, parser=self.parser) tree = remove_display_none(tree) tree = self.cleaner.clean_html(tree) tree.make_links_absolute(url) return tree def maybe_hub(self, url, tree): if self.match_filter_url(url): return False, [] block, matched_a, paths = self.get_hub_block(url, tree) tree = self.remove_p_aside_a(block, tree, matched_a) content_tree = self.get_readability_content(url, tree) content = unicode(content_tree.text_content().strip()) content = re.sub(ur'\s', u'', content) chinese_content = filter_string(content, False, True, True) a_content = sum([len(a.text.strip()) for a in matched_a]) ratio = len(chinese_content)*1.0/(a_content or 0.001) print 'url:%s matched_a:%d match content/link:%f' % (url, len(matched_a), ratio) print len(chinese_content), content.encode('utf-8') #import pdb;pdb.set_trace() if len(matched_a) > 20 and len(chinese_content) < 200 and ratio < 0.2: return True, paths else: return False, paths def get_readability_content(self, url, tree): body = lxml.html.tostring(tree) doc = Document(body) content = doc.summary(True) content_tree = fromstring(content, parser=self.parser) return content_tree def get_hub_block(self, url, tree): a_elements = valid_a_elements(tree.xpath('//a'), url) visited_a = set() all_a = set(a_elements) long_a = set([a for a in a_elements if a.text and len(a.text.strip()) >= 10]) block = [] max_div = 2 max_depth = 8 min_link_number = 4 for start_a in long_a: if start_a in visited_a: continue path = '/a' iter_node = start_a div_count = 0 loop_flag = True for _ in xrange(max_depth): if not loop_flag: break if div_count > max_div or iter_node.tag == 'body': break iter_node = iter_node.getparent() if iter_node is None: break if iter_node.tag in BLOCK_TAG and len(iter_node.getchildren()) > 1: div_count += 1 sibling = iter_node.xpath('.'+path) if len(sibling) >= min_link_number and \ all([x in all_a for x in sibling]): long_a_sibling = [x for x in sibling if x in long_a] block.append((iter_node, path, long_a_sibling)) [visited_a.add(x) for x in sibling] loop_flag = False path = '/' + iter_node.tag + path matched_a = [a for a in long_a if a in visited_a] paths = [] for node, path, long_a in block: paths.append(get_html_path(node) + path) print len(block) #import pdb;pdb.set_trace() return block, matched_a, paths def remove_p_aside_a(self, block, tree, matched_a): matched_a = set(matched_a) for node, path, long_a in block: for e in node.iter(): text = e.text or '' text = text.strip() if len(text) < 100 and e not in matched_a: e.text = '' return tree URL_FILTER_RX = re.compile('''news.xinhuanet.com/video/.+/c_\d+.htm''') def match_filter_url(self, url): if self.URL_FILTER_RX.search(url): return True else: return False def test(self): fout = open('../data/mining_result.txt', 'w') for obj in self.db_helper.get_some_mining_task(0, 180000): url = obj.get('url') _id = str(obj.get('_id')) tree = self.read_html(_id, url) if tree is not None: try: flag, paths = self.maybe_hub(url, tree) aline = [str(flag), url, str(paths)] fout.write('\t'.join(aline) + '\n') except KeyboardInterrupt: sys.exit(1) except: print "ERROR!" traceback.print_exc() else: fout.write('\n') fout.close()
def get_machine_list(): return Db.exec_list("select ip from machine")
def get(user_id): return Db.exec_list("select volume from info where name='%s'", user_id)
def add_machine_list(ip_list): for ip in ip_list: Db.exec_cmd("insert into machine(ip) values('%s')", ip)
def downloadMoreProfiles(limit=3000): downloader = PublicProfileDownloader() urls = DBHelper.getNotExistFileNames(limit=limit) for url in urls: downloader.downloadAndAnalyze(url, analysis=False)
def create(user_id, net_id): Db.exec_cmd("insert into info(net) values('%s') where name='%s'", (net_id, user_id)) # todo: check tuple usage
from conf import TOKEN, DB_NAME from db_helper import DBHelper BTN_TODAY, BTN_TOMORROW, BTN_MONTH, BTN_REGION, BTN_DUA = ('⌛️ Bgun', '⏳ Erta', "📅 To'liq taqvim", '🇺🇿 Mintaqalar', '🤲 Duo') main_buttons = ReplyKeyboardMarkup( [[BTN_TODAY], [BTN_TOMORROW, BTN_MONTH], [BTN_REGION], [BTN_DUA]], resize_keyboard=True) STATE_REGION = 1 STATE_CALENDAR = 2 user_region = dict() db = DBHelper(DB_NAME) def region_buttons(): regions = db.get_regions() buttons = [] tmp_b = [] for region in regions: tmp_b.append( InlineKeyboardButton(region['name'], callback_data=region['id'])) if len(tmp_b) == 2: buttons.append(tmp_b) tmp_b = [] return buttons
def get_machine(index): # todo: what is this? # todo: 用于随机调度? return Db.exec_one("select ip from machine limit %s,1" % index)