def linux_ssh_login1(self): try: logger.debug(self.ip + " Connecting...") ssh = pexpect.spawn('ssh -p 22 %s@%s' % (self.pass1, self.ip)) i = ssh.expect([ 'word:', 'continue connecting (yes/no)?', 'refused', 'fail', 'time', pexpect.TIMEOUT ], timeout=8) if i == 1: ssh.sendline('yes') ssh.expect('word:', timeout=3) if i >= 2: logger.error(self.ip + " Can not reach the remote router!") ssh.close() return None ssh.sendline(self.pass2) i = ssh.expect(['[#$]', 'word:', pexpect.TIMEOUT], timeout=3) if i >= 1: logger.error(self.ip + " Error username or password!") ssh.close() return None except Exception as e: logger.error(self.ip + ' ' + str(e)) ssh.close() return None else: logger.info(self.ip + " Logged in!") return ssh
def h3c_tel_login2(self): try: logger.debug(self.ip + " Connecting...") tel = pexpect.spawn('telnet %s' % self.ip) i = tel.expect( ['word:', 'refused', 'fail', 'time', pexpect.TIMEOUT], timeout=15) if i == 1: logger.error(self.ip + " Can not reach the remote router!") tel.close() return None tel.sendline(self.pass1) i = tel.expect([self.wait1, 'word:', pexpect.TIMEOUT], timeout=5) if i >= 1: logger.error(self.ip + " Invald password!") tel.close() return None tel.sendline('sup') tel.expect('ssword:', timeout=1) tel.sendline(self.pass2) i = tel.expect([self.wait1, 'ssword:', pexpect.TIMEOUT], timeout=3) if i >= 1: logger.error(self.ip + " Error super password!") tel.close() return None except Exception as e: logger.error(self.ip + ' ' + str(e)) tel.close() return None else: logger.info(self.ip + " Logged in!") return tel
def save_to_disk(self): timestr = time.strftime('%Y%m%d%H%M%S',time.localtime()) logger.debug(self.ip + ' start to save config to disk') fname = self.name + '-' + timestr city = hashlib.md5((self.city).encode('UTF-8')).hexdigest() fpath = base_dir + r'/down/conf/%s/%s' % (city, self.name) if not os.path.exists(fpath): try: os.makedirs(fpath) except Exception as e: logger.error(str(e)) return os.chdir(fpath) #save config try: file = open(fname, 'w') file.writelines(self.txt_conf) except Exception as e: logger.error(str(e)) else: logger.info(self.ip + ' finish saving config!') finally: file.close()
def push_to_prd(self): if self.trans_files == []: logger.warn('no data to push') return False ip = envi['prd_sftp_ip'] port = envi['prd_sftp_port'] usr = envi['prd_sftp_user'] pwd = envi['prd_sftp_pass'] src = envi['data_src_name'] rdir = envi['prd_data_dir'] try: target = paramiko.Transport((ip, port)) target.connect(username=usr, password=pwd) sftp = paramiko.SFTPClient.from_transport(target) # transfar via sftp put for f in self.trans_files: sftp.put(self.ldir + f, rdir + f) logger.info('pushing to prd finished') except Exception as e: logger.error(str(e)) send_mail(src + u'自动同步失败', str(e)) return False target.close() send_mail(src + u'自动同步成功', u'请使用md5文件校验数据文件*.tar.gz') return True
def run_word2vec(files, words, window): f = lambda x: x if x in words else config["unknow_word"] corpus = [] for file in files: with open(file, encoding="utf-8", mode="r") as file: for line in file.readlines(): # for train data split("/"):可能会用到trian data words_line = [ w.split("/")[0] for w in patt.split(line.strip()) ] words_line_with_unk = list(map(f, words_line)) if len(words_line) < window: # print(line) continue corpus.append([v + "_" for v in words_line_with_unk]) logger.info("训练word2vec 的句子数量:%s" % len(corpus)) with open("data/train_word2vec.txt", encoding="utf-8", mode="w") as file: string = "\n".join([" ".join(words) for words in corpus]) file.write(string) # train # 这里频率低的已经变成了unk,min_count=1 word2vec("data/word2vec.txt", corpus, embeding_size=256, min_count=1, window=window)
async def cookie2user(cookie_str): """ Parse cookie and load user if cookie is valid. """ if not cookie_str: return None try: L = cookie_str.split('-') if len(L) != 3: return None uid, expires, sha1 = L if int(expires) < time.time(): return None user = await User.find(uid) if user is None: return None s = '%s-%s-%s-%s' % (uid, user.passwd, expires, _COOKIE_KEY) if sha1 != hashlib.sha1(s.encode('utf-8')).hexdigest(): logger.info('invalid sha1') return None user.passwd = '******' return user except Exception as e: logger.exception(e) return None
def pull_from_cmm(self): ip = envi['cmm_sftp_ip'] port = envi['cmm_sftp_port'] usr = envi['cmm_sftp_user'] pwd = envi['cmm_sftp_pass'] src = envi['data_src_name'] p = envi['cmm_data_pattern'] try: target = paramiko.Transport((ip, port)) target.connect(username=usr, password=pwd) sftp = paramiko.SFTPClient.from_transport(target) logger.debug("connect to cmm's sftp server") # to find files for transfaring for rdir in envi['cmm_data_dir']: for f in sftp.listdir(rdir): if f not in self.loc_files and re.match(p, f) != None: self.trans_files.append(f) # transfar via sftp get sftp.get(rdir + f, self.ldir + f) logger.debug('file: <' + str(f) + '> transfared') if self.trans_files == []: logger.warn('no data to pull') send_mail(src + u'未更新', u'请咨询数据提供技术人员') else: logger.info('pulling finished: ' + str(self.trans_files)) except Exception as e: logger.error(str(e)) send_mail(src + u'自动同步失败', str(e)) return False target.close() return True
def dell_tel_login1(self): try: logger.debug(self.ip + " Connecting...") tel = pexpect.spawn('telnet %s' % self.ip) i = tel.expect( ['ogin:', 'refused', 'fail', 'time', pexpect.TIMEOUT], timeout=5) if i >= 1: logger.error(self.ip + " Can not reach the remote router!") tel.close() return None tel.sendline(self.pass1) i = tel.expect('word:', timeout=2) tel.sendline(self.pass2) i = tel.expect([self.wait2, 'ogin:', pexpect.TIMEOUT], timeout=5) if i >= 1: logger.error(self.ip + " Error username or password!") tel.close() return None # except Exception as e: logger.error(self.ip + ' ' + str(e)) tel.close() return None else: logger.info(self.ip + " Logged in!") return tel
def logout(self, obj): #print type(obj) if obj == None: return False time.sleep(1) obj.close() logger.info(self.ip + " Logged out") return True
def parse_data(http_url, conn): logger.info("请求url:" + http_url) soup, date_str = get_html_content(http_url) if soup is not None: get_data(conn=conn, date_str=date_str, soup4=soup) else: logger.error("获取数据失败,请检查网络连接")
def parse_data(http_url, conn, year='',month=''): logger.info("请求url:" + http_url) soup, date_str = get_html_content(http_url,year,month) time.sleep(random.randint(2, 4)) if soup is not None: get_data(conn=conn, date_str=date_str, soup4=soup) else: logger.error("获取数据失败,请检查网络连接")
async def parse_data(request): if request.method == 'POST': if request.content_type.startswith('application/json'): request.__data__ = await request.json() logger.info('request json: %s' % str(request.__data__)) elif request.content_type.startswith( 'application/x-www-form-urlencoded'): request.__data__ = await request.post() logger.info('request form: %s' % str(request.__data__)) return (await handler(request))
async def __call__(self, request): kw = None if (self._has_var_kw_arg or self._has_named_kw_args or self._required_kw_args): if request.method == 'POST': if not request.content_type: return web.HTTPBadRequest('Missing Content-Type.') ct = request.content_type.lower() if ct.startswith('application/json'): params = await request.json() if not isinstance(params, dict): return web.HTTPBadRequest('JSON body must be object.') kw = params elif ct.startswith('application/x-www-form-urlencoded' ) or ct.startswith('multipart/form-data'): params = await request.post() kw = dict(**params) else: return web.HTTPBadRequest('Unsupported Content-Type: %s' % request.content_type) if request.method == 'GET': qs = request.query_string if qs: kw = dict() for k, v in parse.parse_qs(qs, True).items(): kw[k] = v[0] if kw is None: kw = dict(**request.match_info) else: if not self._has_var_kw_arg and self._named_kw_args: # remove all unamed kw: copy = dict() for name in self._named_kw_args: if name in kw: copy[name] = kw[name] kw = copy # check named arg: for k, v in request.match_info.items(): if k in kw: logger.warning( 'Duplicate arg name in named arg and kw args: %s' % k) kw[k] = v if self._has_request_arg: kw['request'] = request # check required kw: if self._required_kw_args: for name in self._required_kw_args: if name not in kw: return web.HTTPBadRequest('Missing argument: %s' % name) logger.info('call with args: %s' % str(kw)) try: r = await self._func(**kw) return r except APIError as e: return dict(error=e.error, data=e.data, message=e.message)
async def init(loop): await orm.create_pool(loop=loop, **configs.db) app = web.Application( loop=loop, middlewares=[logger_factory, auth_factory, response_factory]) init_jinja2(app, filters=dict(datetime=datetime_filter)) add_routes(app, 'handlers') add_static(app) srv = await loop.create_server(app.make_handler(), '127.0.0.1', 9000) logger.info('server started at http://127.0.0.1:9000...') return srv
def insert(conn, sql, param): """ :param conn: conn :param sql: sql :param param """ # 获取会话指针 cursor = conn.cursor() # 执行sql语句 cursor.execute(sql, param) conn.commit() logger.info("insert:" + str(param))
def add_route(app, fn): method = getattr(fn, '__method__', None) path = getattr(fn, '__route__', None) if path is None or method is None: raise ValueError('@get or @post not defined in %s.' % str(fn)) if not asyncio.iscoroutinefunction(fn) and not inspect.isgeneratorfunction( fn): fn = asyncio.coroutine(fn) logger.info('add route %s %s => %s(%s)' % (method, path, fn.__name__, ', '.join( inspect.signature(fn).parameters.keys()))) app.router.add_route(method, path, RequestHandler(app, fn))
async def auth(request): logger.info('check user: %s %s' % (request.method, request.path)) request.__user__ = None cookie_str = request.cookies.get(COOKIE_NAME) if cookie_str: user = await cookie2user(cookie_str) if user: logger.info('set current user: %s' % user.email) request.__user__ = user if request.path.startswith('/manage/') and ( request.__user__ is None or not request.__user__.admin): return web.HTTPFound('/signin') return (await handler(request))
def create_dictionary(token_dict, dic_path, start=0, sort=False, min_count=None, lower=False, overwrite=True): """ 构建字典,并将构建的字典写入pkl文件中,即将信号字典编排序号,并保存 Args: token_dict: dict, [token_1:count_1, token_2:count_2, ...] dic_path: 需要保存的路径(以pkl结尾) start: int, voc起始下标,默认为0 sort: bool, 是否按频率排序, 若为False,则按items排序 min_count: int, 词最少出现次数,低于此值的词被过滤 lower: bool, 是否转为小写 overwrite: bool, 是否覆盖之前的文件 Returns: voc size: int """ if os.path.exists(dic_path) and not overwrite: return 0 voc = dict() if sort: # sort token_list = sorted(token_dict.items(), key=lambda d: d[1], reverse=True) for i, item in enumerate(token_list): if min_count and item[1] < min_count: continue index = i + start key = item[0] voc[key] = index else: # 按items排序 if min_count: items = sorted([ item[0] for item in token_dict.items() if item[1] >= min_count ]) else: items = sorted([item[0] for item in token_dict.items()]) for i, item in enumerate(items): item = item if not lower else item.lower() index = i + start voc[item] = index # 写入文件 logger.info(voc) file = open(dic_path, 'wb') pickle.dump(voc, file) file.close() return len(voc.keys()) + start # 用到每列特征去重后的数量 加上 start
def parse_pdf(http_url): pdf_file = util_urllib.get_resp(http_url) output_string = read_pdf(pdf_file) pdf_file.close() output_string = output_string.replace(" ", "") target_value = output_string.split("\n") for obj in target_value: if obj == "": target_value.remove(obj) # 运输完成情况的序号 value_index = 0 # 运输总周转量的序号 total_turnover = 0 # 货邮运输量的序号 goods_transport_volume = 0 # 第一个数字的序号 num_index = 0 for i, val in enumerate(target_value): # print(val) if "运输总周转量" in val: total_turnover = i continue if "货邮运输量" in val: goods_transport_volume = i continue if "运输完成情况" in val: value_index = i continue if total_turnover > 10 and "." in val: num_index = i break if total_turnover > 10: # print((goods_transport_volume - total_turnover) / 2) # print(int((goods_transport_volume - total_turnover) / 2) + 2) # print(int((goods_transport_volume - total_turnover) / 2) + 4) inland_volume = target_value[num_index + int( (goods_transport_volume - total_turnover) / 2) + 1] foreign_volume = target_value[num_index + int( (goods_transport_volume - total_turnover) / 2) + 3] else: inland_volume = target_value[value_index + goods_transport_volume - total_turnover + 2] foreign_volume = target_value[value_index + goods_transport_volume - total_turnover + 4] # print("total_turnover:", total_turnover, "goods_transport_volume:", goods_transport_volume, "value_index", # value_index, "num_index:", num_index) # print("国内货邮运输量:", inland_volume, "国际货邮运输量:", foreign_volume) logger.info("国内货邮运输量:%s 国际货邮运输量:%s" % (inland_volume, foreign_volume)) return float(inland_volume), float(foreign_volume)
def get_voc_dict(files, start=2): """ 统计词频,对于一些低频的字符什么都学不到 :param files: :param filter_min: :return: """ words = [] for file in files: logger.info(file) with open(file, encoding="utf-8", mode="r") as file: # print(file.read()) string = file.read() # 用到train data words.extend([w.split("/")[0] for w in patt.split(string.strip())]) del string counter = Counter(words) logger.info(counter) word_ids = defaultdict() for index, word in enumerate(counter.keys()): word_ids[word] = index + start logger.info(word_ids) logger.info(len(word_ids)) pickle.dump( word_ids, open( "D:/projects_py/datagrand_extract_info/data/lstm_crf/train_test_word2ids.pkl", mode="wb")) return len(word_ids)
def urlopen(url, data=""): # 出错重连,默认3次 for i in range(reconnection_count): try: if data == "": resp = request.urlopen(url=url, timeout=time_out) else: resp = request.urlopen(url=url, data=data, timeout=time_out) logger.info("获取" + url.full_url + "内容成功") return resp except Exception as e: logger.error(e) logger.error("第" + str(i + 1) + "次请求失败:" + url.full_url) time.sleep(3) return ""
def linux_ssh_login2(self): try: logger.debug(self.ip + " Connecting...") ssh = pexpect.spawn('ssh %s@%s' % (self.pass1, self.ip)) i = ssh.expect(['$', pexpect.TIMEOUT], timeout=8) if i >= 1: logger.error(self.ip + " Error login using ssh key!") ssh.close() return None except Exception as e: logger.error(self.ip + ' ' + str(e)) ssh.close() return None else: logger.info(self.ip + " Logged in!") return ssh
def ftp_alert(msg, phone): #新建临时文件 fpath = r'/var/www/html/netmon/log' os.chdir(fpath) timestr = time.strftime('%H%M%S', time.localtime()) fname = '220000000_000000000000_' + timestr + '_NETMON.txt' text = '' #print type(msg) #将中文编码转换成ftp服务器上的编码 msg = msg.encode('gbk') for num in phone: text = text + '|' + num + '||' + msg + "|0|\n" #print type(text) try: file = open(fname, 'w') file.writelines(text) file.close() except Exception as e: logger.error(str(e)) file.close() return False else: logger.debug('temp file [' + fname + '] created') #临时文件上传ftp服务器 try: ftp = FTP() ftp.set_debuglevel(1) ftp.connect(ftp_server, ftp_port) ftp.login(ftp_usr, ftp_pwd) logger.debug(" Ftp Server Logged in!") ftp.cwd(ftp_dir) ftp.storlines('STOR ' + fname, open(fname, 'r')) ftp.set_debuglevel(0) ftp.close() except Exception as e: logger.error(str(e)) ftp.close() return False else: logger.info(" file uploaded to FTP server!") #清理临时文件 os.remove(fname) return True
def get_data(conn, date_str, soup4, data_type=5): # print(soup4) table = soup4.find(id="placehereresult2") # print(table) tds = table.find_all("td") # print(tds) for td in tds: # print(td) text = td.get_text() if "平均" in text: val = td.span.get_text() logger.info("平均值" + val + date_str) # 2017/02/03 yearmonth = date_str[0:4] + "-" + date_str[5:7] # 原油价格 insert(conn, 7, yearmonth, float(val)) break
def get_weibo(mid): url = "http://10.75.57.27/getdata/querydata2.php?condition=%s&mode=weibo&format=json" % ( mid.strip()) #try: if 1: start_time = clock() text = requests.get(url, timeout=4).text logger.info(str(text)) logger.debug(url) end_time = clock() testLogger() #lock.acquire() #print text #with open('request.log', 'a+') as logs: # logs.write('request %s cost: %s\n' % (url, end_time - start_time)) #print url #lock.release() return text
def init_jinja2(app, **kw): logger.info('init jinja2...') options = dict(autoescape=kw.get('autoescape', True), block_start_string=kw.get('block_start_string', '{%'), block_end_string=kw.get('block_end_string', '%}'), variable_start_string=kw.get('variable_start_string', '{{'), variable_end_string=kw.get('variable_end_string', '}}'), auto_reload=kw.get('auto_reload', True)) path = kw.get('path', None) if path is None: path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templates') logger.info('set jinja2 template path: %s' % path) env = Environment(loader=FileSystemLoader(path), **options) filters = kw.get('filters', None) if filters is not None: for name, f in filters.items(): env.filters[name] = f app['__templating__'] = env
def word2vec(model_path, corpus, embeding_size=256, min_count=1, window=7): path = get_tmpfile(model_path) logger.info("开始训练word2vec:%s" % time.ctime()) model = Word2Vec(sentences=corpus, size=embeding_size, min_count=min_count, window=window, workers=4, iter=10) logger.info("结束训练word2vec:%s" % time.ctime()) model.save(model_path) # model.wv.save(wv_path) # print(model.wv.vocab.items()) # model = Word2Vec.load(model_path) with open(model_path, encoding="utf-8", mode="w") as file: for word, _ in model.wv.vocab.items(): # print(word) vector = [str(i) for i in model.wv[word]] file.write(word + " " + " ".join(vector) + "\n")
async def response(request): logger.info('Response handler...') r = await handler(request) if isinstance(r, web.StreamResponse): return r if isinstance(r, bytes): resp = web.Response(body=r) resp.content_type = 'application/octet-stream' return resp if isinstance(r, str): if r.startswith('redirect:'): return web.HTTPFound(r[9:]) resp = web.Response(body=r.encode('utf-8')) resp.content_type = 'text/html;charset=utf-8' return resp if isinstance(r, dict): template = r.get('__template__') if template is None: resp = web.Response(body=json.dumps( r, ensure_ascii=False, default=lambda o: o.__dict__).encode('utf-8')) resp.content_type = 'application/json;charset=utf-8' return resp else: r['__user__'] = request.__user__ # logger.debug() resp = web.Response( body=app['__templating__'].get_template(template).render( **r).encode('utf-8')) resp.content_type = 'text/html;charset=utf-8' return resp # if isinstance(r, int) and r >= 100 and r < 600: if isinstance(r, int) and r >= 100 and r < 600: return web.Response(r) if isinstance(r, tuple) and len(r) == 2: t, m = r if isinstance(t, int) and t >= 100 and t < 600: return web.Response(t, str(m)) # default resp = web.Response(body=str(r).encode('utf-8')) resp.content_type = 'text/plain;charset=utf-8' return resp
def get_db_cursor(self): try: cursor = None if self.cnx: cursor = self.cnx.cursor() except MySQLdb.Error as err: logger.info(err) try: yield cursor # 记录异常 if cursor: self.cnx.commit() except MySQLdb.Error as err: self.cnx.rollback() logger.info(err) logger.info(cursor.cmd) logger.info('cursor has been rollback') finally: cursor.close() logger.info('cursor has been closed')
def prepare_pred_data(self, text): max_length = len(text) + 2 tokens = list(text) tokens = ["[CLS]"] + tokens + ["[SEP]"] logger.info(tokens) input_ids = self.tokenizer.convert_tokens_to_ids(tokens) input_mask = [1] * len(input_ids) segment_ids = [0] * len(input_ids) input_ids = input_ids + (max_length - len(input_ids)) * [0] segment_ids = segment_ids + (max_length - len(segment_ids)) * [0] input_mask = input_mask + (max_length - len(input_mask)) * [0] feed = { self.input_ids: [input_ids], self.segment_ids: [segment_ids], self.input_mask: [input_mask], self.dropout: 1.0 } return feed