def calculate_average_bow_size(res_folder): """ Calculate average bow size for the URLBow database :param res_folder: :return: """ total_bow_sizes={"right":0,"wrong":0,"swing":0} bow_count={"right":0,"wrong":0,"swing":0} Logger.info("Average bow size, on right bow size") for right_res in RightResultsIter(res_folder): total_bow_sizes["right"]+=len(URLBow.objects.get(index=right_res.ref_id).bow) bow_count["right"]+=1 Logger.info("Average bow size, on wrong bow size") for wrong_res in WrongResultsIter(res_folder): if wrong_res.is_swing_sample(): label="swing" else: label="wrong" bow_count[label]+=1 total_bow_sizes[label]+=len(URLBow.objects.get(index=wrong_res.ref_id).bow) print([(label,total/bow_count[label] if bow_count[label] != 0 else 1,bow_count[label]) for label,total in total_bow_sizes.items()])
class GetAuth: def __init__(self, env='QA'): self.log = Logger("debug") self.opera = OperationIni(fileName='config.ini', pathName='config') path = '/website/saas/account/api2/user/login' self.key = env.lower() + '_token' d = get_env_authorization(env=env) self.url = d[0] + path self.cookie = d[1] self.userName = d[2] self.passWord = d[3] # if env == 'QA': # self.url = self.opera.read_ini(section='Authorization', key='qa_url') + path # self.cookie = self.opera.read_ini(section='Authorization', key='qa_cookie') # self.userName = self.opera.read_ini(section='Authorization', key='qa_username') # self.passWord = self.opera.read_ini(section='Authorization', key='qa_password') # if env == 'DEV': # self.url = self.opera.read_ini(section='Authorization', key='dev_url') + path # self.cookie = self.opera.read_ini(section='Authorization', key='dev_cookie') # self.userName = self.opera.read_ini(section='Authorization', key='dev_username') # self.passWord = self.opera.read_ini(section='Authorization', key='dev_password') self.headers = { 'Cookie': self.cookie, 'Content-Type': 'application/x-www-form-urlencoded' } def get_auth(self): ''' 获取token :return: rsp, 登录后的token ''' data = { 'zone': '0086', 'phone': self.userName, 'password': self.passWord, 'remember': False, 'passwordType': 'new' } self.log.info('开始:调用获取B端后台token接口,请求地址为:{0},入参为:{1},请求头为:{2}'.format( self.url, data, self.headers)) r = requests.post(url=self.url, data=data, headers=self.headers) try: token = r.json()['data']['token'] self.log.info('结束:调用获取B端后台token接口,获取到token为:{0}'.format(token)) return token, r.json() except Exception as f: self.log.error('获取B端后台token失败,错误日志为:{0}'.format(f)) print(f) def set_auth(self): ''' 存储最新的后端Authorization :return: ''' token = self.get_auth()[0] self.opera.write_ini(section='Authorization', data=token, key=self.key)
def scrape_links_from_position(self,pos): MongoDB.connect(settings.HOST_NAME,settings.PORT) links=self.__get_next_urls(pos) Logger.info(links) for link in links: self.scrape_link_and_child(link) Logger.debug('Process job completed') return 0
def scrape_link_and_child(self,parent_url): parent_url=base_util.replace_dot_url(parent_url) webpage_body,parent_url=self.scrape(base_util.unreplace_dot_url(parent_url),None) #exit if failed to scrap website if webpage_body is None: return Logger.debug('Saving Parent') MongoDB.save_page(url=parent_url,page=webpage_body) Logger.info('Completed page: '+parent_url) #Now, we grab the childs of this webpage all_ahref=[base_util.combine_parent_rel_link(parent_url,a.attrs['href']) for a in BeautifulSoup(webpage_body,'html.parser', from_encoding="utf-8").find_all('a') if 'href' in a.attrs] child_urls=random.sample(all_ahref,settings.GET_X_CHILD) if len(all_ahref)>=settings.GET_X_CHILD else all_ahref #get rid of bad normalization if not re.match('^www[.].*$',parent_url): Logger.info('Updating bad url for {}'.format(parent_url)) MongoDB.update_url(base_util.normalize_url(parent_url),parent_url) if len(child_urls) > 0: #get the childs, child urls is a subset of all urls for child_url in child_urls: Logger.debug('Get Child {}'.format(child_url)) child_page=self.scrape(child_url,parent_url) if child_page is None: exploredset=set() tries=0 for url in set(all_ahref)^(exploredset): if tries==settings.MAX_RETRIES: Logger.info('Max retrie number exceeded') break Logger.info("trying new url: "+url) child_page=self.scrape(url,parent_url) if child_page is not None: break exploredset.add(url) tries+=1 if child_page is not None: Logger.debug('Saving Child {}'.format(child_url)) MongoDB.save_modify_url(url=base_util.replace_dot_url(child_url),parent=[MongoDB.get_url_object(parent_url)],genre=[],page=child_page) Logger.info('Completed page: '+child_url)
def __onConsume(self): try: Logger.info("Initializing MQTT Response ...") self.__client = mqtt.Client() self.__client.on_connect = self.__onConnect self.__client.on_message = self.__onMessage broker = self.__properties.get('address.broker') port = int(self.__properties.get('port.broker')) keepAliveBroker = int(self.__properties.get('keep.alive.broker')) subscribe = self.__properties.get('topic.subscribe.broker') self.__client.connect(broker, port, keepAliveBroker) self.__client.subscribe(subscribe) self.__client.loop_forever() except Exception as exception: Logger.error("MQTT Response failed. Cause: " + str(exception))
def calculate_genres_per_instance(res_folder,classifiers=""): current_classifier=classifiers right_genresize_counter=collections.Counter() wrong_genresize_counter=collections.Counter() swing_genresize_counter=collections.Counter() Logger.info("Current on rights") #iterate over the right samples first, we don't write to file because right files are the same for right_res_obj in {x.ref_id: x for x in RightResultsIter(res_folder,classifiers)}.values(): assert isinstance(right_res_obj,ClassificationResultInstance) if right_res_obj.classifier != current_classifier: current_classifier=right_res_obj.classifier #now find the size of its genre right_genresize_counter.update([len(URLBow.objects.get(index=right_res_obj.ref_id).short_genres)]) Logger.info("Current on wrongs") swing_file=res_folder+"/{}swing.txt".format(classifiers+"_" if classifiers.strip()!="" else classifiers) wrong_file=res_folder+"/{}wrong_true.txt".format(classifiers+"_" if classifiers.strip()!="" else classifiers) with open(swing_file,mode="w") as swing_handle,open(wrong_file,mode="w") as wrong_handle: #iterate over the wrong samples for wrong_res_obj in {x.ref_id: x for x in WrongResultsIter(res_folder,classifiers)}.values(): assert isinstance(wrong_res_obj,ClassificationResultInstance) if wrong_res_obj.classifier != current_classifier: current_classifier=wrong_res_obj.classifier if wrong_res_obj.is_swing_sample(): swing_handle.write(str(wrong_res_obj)+"\n") swing_genresize_counter.update([len(URLBow.objects.get(index=wrong_res_obj.ref_id).short_genres)]) else: wrong_handle.write(str(wrong_res_obj)+"\n") #now find the size of its genre wrong_genresize_counter.update([len(URLBow.objects.get(index=wrong_res_obj.ref_id).short_genres)]) print("Wrong predicted sample distrbution: {}".format(sorted(wrong_genresize_counter.items(),key=operator.itemgetter(0)))) print("Right predicted sample distrbution: {}".format(sorted(right_genresize_counter.items(),key=operator.itemgetter(0)))) print("Swing sample distrbution: {}".format(sorted(swing_genresize_counter.items(),key=operator.itemgetter(0))))
def get(self, url): self._randomized_wait() response=None try: response= self.http.request('GET',url,timeout=settings.TIME_OUT) self.bad_count=0 except: self.bad_count+=1 # wait and sleep until we get an answer if self.bad_count >= settings.REQUEST_EXCEPTION_UNTIL_TEST_CONNECTION: while(not self.testInternet()): Logger.info('Waiting for internet') time.sleep(2) response= self.http.request('GET',url,timeout=settings.TIME_OUT) self.bad_count=0 return response
def scrape(self): home=self.http.get(dmoz_home) home_page_links=self._scrapeHomeAndGetLinks(home.data) #visit each link in homepage and dig down #for url in home_page_links: i=0 while i<settings.NUM_RANDOM_WEBPAGE: result=self._scrapPage(home_page_links[random.randint(0,len(home_page_links)-1)]) if result is not None and MongoDB.get_url_object(result['url']) is None: i+=1 try: page=utf_8_safe_decode(self.http.get(result['url']).data) MongoDB.save_modify_url(page=page,**result) Logger.info("Completed: "+result['url']) except Exception as ex: Logger.error(ex)
def calculate_similarity(): q = DBQueue("similarity_queue") genre_meta_data = GenreMetaData.objects.order_by("url")[q.get() :] # init the Analytics analytics_coll = col.Analytics() if analytics_coll.select(name=ANALYTICS_NAME).find_one() is None: analytics_coll.create( alexa_total=0, edit_distance_count=0, total_edit_distance=0, alexa_match=0, name=ANALYTICS_NAME, alexa_genre_length=0, ) urls = set() # calculate the similar on a document to document basis for genre_meta in genre_meta_data: if genre_meta["url"] not in urls: urls.add(genre_meta["url"]) Logger.info("Doing genre for url: {}".format(genre_meta["url"])) similarity_res = _calculate_similarity_document(genre_meta) analytics_obj = analytics_coll.select(name=ANALYTICS_NAME).find_one() for k in similarity_res.keys(): similarity_res[k] += analytics_obj[k] analytics_coll.select(name=ANALYTICS_NAME).update(**similarity_res) q.increment() print("URL has a unique percent of {}".format(len(urls) / len(genre_meta_data) * 100))
def main(): (partition, logLevel, mode) = getOptions(sys.argv[1:]) logger = Logger(name="a2d2", logFile=conf.APPLICATION_LOG_FILE, level=logLevel) logger.info("\n\n\n" + 78 * '=') logger.info("Options: IS partition: '%s' mode: '%s'" % (partition, mode)) # create ROS retriever try: m = "Could not create an instance of retriever, reason: %s" if mode in ("prod", "production"): from ros.RosIsRetriever import RosIsRetriever rosRetriever = RosIsRetriever(partition, logger) elif mode in ("devel", "development"): rosRetriever = RosDummyRetriever(logger) else: raise RosRetrieverException("Unsupported mode: '%s'" % mode) except RosRetrieverException, ex: logger.fatal(m % ex) logger.close() sys.exit(1)
class ThreadChecker(threading.Thread): """ Thread implementation is completely independent on what actions may be run by it ... """ def __init__(self, logLevel, action): self.actionToRun = action self.logger = Logger(name="a2d2 thread", logFile=conf.APPLICATION_LOG_FILE, level=logLevel) threading.Thread.__init__(self) self.__stopFlag = False self.__bypass = False # if True, actions are skipped in periodic check self.logger.info("Initialised.") def run(self): while True: if self.__stopFlag: break time.sleep(3) if self.__stopFlag: break self.logger.debug("Running ...") if self.__bypass: self.logger.debug("Dummy loop, actual thread action bypassed.") else: self.actionToRun(self.logger) self.logger.info("Loop finished, sleeping ...") self.logger.info("Completely finished.") def setStop(self): self.logger.debug("Setting stop flag to True ...") self.__stopFlag = True self.logger.debug("Stop flag set to True.") def bypassExecution(self): self.__bypass = True def resumeExecution(self): self.__bypass = False
def __onMessage(self, client, userdata, message): Logger.info('Received response. Client : ' + str(client)) Logger.info('Received response. User data: ' + str(userdata)) Logger.info('Received response. Message : ' + str(message)) response = Response() response_json = json.loads(message.payload) response.setId(response_json['id']) response.setReplyHost(str(response_json['replyHost']).strip()) response.setReplyPort(int(response_json['replyPort'])) response.setReplyChannel(str(response_json['replyChannel']).strip()) response.setVersionProtocol( str(response_json['versionProtocol']).strip()) response.setStatusCode(response_json['statusCode']) response.setStatusMessage(str(response_json['statusMessage']).strip()) response.setHeader(str(response_json['header']).strip()) response.setBody(str(response_json['body']).strip()) self.__responseService.route(response)
for fold in range(0,k_folds): test_end_fold=(fold+1)*num_per_fold if fold != k_folds-1 else len(shuffled_indexes) test_start_fold=fold*num_per_fold test_ind=shuffled_indexes[test_start_fold:test_end_fold] train_ind=shuffled_indexes[:test_start_fold]+shuffled_indexes[test_end_fold:] yield train_ind,test_ind if __name__=="__main__": #See classification.classification_settings for the adjustable settings supervised_logger.info("Number of Weights: {}".format(weights.num_classifiers)) #CLASSIFICATION, adjust weights classifier_util=ClassifierUtil() """ LOAD DATA, preprocess """ #WARNING: REF INDEX for each individual X set must match row to row Xs=[] ys=[] ref_indexes_unmatched=[] ref_indexes=[] for setting in settings:
class updateGoodsShelfStatus: def __init__(self, pid, env='QA'): self.log = Logger("debug") opera = OperationIni(fileName='config.ini', pathName='config') self.get_skuId = GetGoodsDetail(env=env, pid=pid) self.get_access_token = GetAccessToken(env=env, pid=pid) # env字符串转小写 env = env.lower() key = env + '_url' self.base_url = opera.read_ini(section='goods', key=key) self.path = opera.read_ini(section='goods', key='updateGoodsShelfStatus') self.access_token = self.get_access_token.get_ini_access_token() def update_goods_shelf_status(self, goodsIdList, isPutAway, storeId=None): ''' 批量修改商品上下架状态 :param goodsIdList: 商品id,限制50个,list类型 :param isPutAway: 商品上、下架0:上架 1:下架 :param storeId: 门店id :return: rsq ''' url = self.base_url.format(self.path, self.access_token) # 把goodsIdList中的多个goodsId转成list类型,并保证list中的元素类型为int goodsIdLists = list(map(int, goodsIdList.split(','))) json_data = { 'storeId': storeId, 'goodsIdList': goodsIdLists, 'isPutAway': isPutAway } self.log.info( '开始:调用update_goods_shelf_status方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) requests.packages.urllib3.disable_warnings() r = requests.post(url=url, json=json_data, verify=False) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': # 获取最新的token并存入ini文件 self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.base_url.format(self.path, new_access_token) self.log.warning( '开始:调用update_goods_shelf_status方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) requests.packages.urllib3.disable_warnings() res = requests.post(url=url, json=json_data, verify=False) self.log.warning( '结束:调用update_goods_shelf_status方法,返回数据为:{0}'.format( res.json())) return res.json() else: self.log.info('结束:调用update_goods_shelf_status方法,返回数据为:{0}'.format( r.json())) return r.json()
class ClassifierPack(BaseClass, _Reformat_Ys, _clf_metric): class_pack = { "skMLP": skMLP, "skSGD": skSGD, "skGaussian_NB": skGaussian_NB, "skBernoulli_NB": skBernoulli_NB, "skMultinomial_NB": skMultinomial_NB, "skDecisionTree": skDecisionTree, "skRandomForest": skRandomForest, "skExtraTrees": skExtraTrees, "skAdaBoost": skAdaBoost, "skGradientBoosting": skGradientBoosting, "skQDA": skQDA, "skKNeighbors": skKNeighbors, "skLinear_SVC": skLinear_SVC, "skRBF_SVM": skRBF_SVM, "skGaussianProcess": skGaussianProcess, "skBagging": skBagging, "XGBoost": XGBoostClf, "LightGBM": LightGBMClf, "CatBoost": CatBoostClf, 'mlxAdaline': mlxAdalineClf, 'mlxLogisticRegression': mlxLogisticRegressionClf, 'mlxMLP': mlxMLPClf, 'mlxPerceptronClf': mlxPerceptronClf, 'mlxSoftmaxRegressionClf': mlxSoftmaxRegressionClf, } def __init__(self, pack_keys=None): super().__init__() self.log = Logger(self.__class__.__name__) if pack_keys is None: pack_keys = self.class_pack.keys() self.pack = {} for key in pack_keys: self.pack[key] = self.class_pack[key]() self.optimize_result = {} self.params_save_path = SKLEARN_PARAMS_SAVE_PATH def param_search(self, Xs, Ys): Ys = self._reformat_to_index(Ys) for key in self.pack: cls = self.class_pack[key] obj = cls() optimizer = ParamOptimizer(obj, obj.tuning_grid) self.pack[key] = optimizer.optimize(Xs, Ys) self.optimize_result[key] = optimizer.result optimizer.result_to_csv() self.log.info("top 5 result") for result in optimizer.top_k_result(): self.log.info(pformat(result)) def _collect_predict(self, Xs): result = {} for key in self.pack: try: result[key] = self.pack[key].predict(Xs) except BaseException as e: self.log.warn(f'while fitting, {key} raise {e}') return result def predict(self, Xs): return self._collect_predict(Xs) def fit(self, Xs, Ys): Ys = self._reformat_to_index(Ys) for key in self.pack: try: self.pack[key].fit(Xs, Ys) except BaseException as e: self.log.warn(f'while fitting, {key} raise {e}') def score(self, Xs, Ys, metric='accuracy'): Ys = self._reformat_to_index(Ys) scores = {} for clf_k, predict in self._collect_predict(Xs).items(): scores[clf_k] = self._apply_metric(Ys, predict, metric) return scores def score_pack(self, Xs, Ys): Ys = self._reformat_to_index(Ys) ret = {} for clf_k, predict in self._collect_predict(Xs).items(): ret[clf_k] = self._apply_metric_pack(Ys, predict) return ret def predict_proba(self, Xs): result = {} for key in self.pack: try: result[key] = self.pack[key].predict_proba(Xs) except BaseException as e: self.log.warn(f'while predict_proba, {key} raise {e}') return result def import_params(self, params_pack): for key in self.pack: class_ = self.class_pack[key] self.pack[key] = class_(**params_pack[key]) def export_params(self): params = {} for key in self.pack: clf = self.pack[key] params[key] = clf.get_params() return params def save_params(self, path=None): if path is None: path = os.path.join(self.params_save_path, time_stamp()) params = self.export_params() pickle_path = path + '.pkl' dump_pickle(params, pickle_path) self.log.info('save params at {}'.format([pickle_path])) return pickle_path def load_params(self, path): self.log.info('load params from {}'.format(path)) params = load_pickle(path) self.import_params(params) def make_FoldingHardVote(self): clfs = [v for k, v in self.pack.items()] return FoldingHardVote(clfs) def make_stackingClf(self, meta_clf): clfs = [ clf for k, clf in self.pack.items() if hasattr(clf, 'get_params') ] return mlxStackingClf(clfs, meta_clf) def make_stackingCVClf(self, meta_clf): clfs = [ clf for k, clf in self.pack.items() if hasattr(clf, 'get_params') ] return mlxStackingCVClf(clfs, meta_clf) def clone_top_k_tuned(self, k=5): new_pack = {} for key in self.pack: new_pack[key] = self.pack[key] results = self.optimize_result[key][1:k] for i, result in enumerate(results): param = result["param"] cls = self.pack[key].__class__ new_key = str(cls.__name__) + '_' + str(i + 1) clf = cls(**param) new_pack[new_key] = clf self.pack = new_pack return self.pack def drop_clf(self, key): self.pack.pop(key) def add_clf(self, key, clf): if key in self.pack: raise KeyError(f"key '{key}' is not unique") self.pack[key] = clf def clone_clf(self, key, n=1, param=None): if key not in self.pack: raise KeyError(f"key '{key}' not exist")
class MockOrderThrow: def __init__(self, pid, env): self.log = Logger("debug") self.pid = pid self.env = env self.opera = OperationIni() self.get_order_detail = GetOrderDetail(pid=pid, env=env) def mock_order_throw(self, orderNo): url = self.opera.read_ini(self.env, key='mock_order_throw_ip') mock_url = 'http://' + url + ':8080/service' headers = {'Content-Type': 'application/x-www-form-urlencoded'} order_detail = self.get_order_detail.get_order_item_id_skuNum( orderNo=orderNo) pickingPackageList = order_detail[0] storeId = order_detail[1] wid = order_detail[2] # 组装paramterInput参数 paramterInput = [{ "markNo": "111", "orderNo": orderNo, "pickingPackageList": pickingPackageList, "pid": self.pid, "storeId": storeId, "wid": wid }] # 组装参数 data = { 'serviceName': 'orderCenterUpdateExportService', 'methodName': 'pickingAndDelivery', 'paramterInput': '{0}'.format(paramterInput) } # 连接VPN start_vpn() code = None try: self.log.info("开始:调用订单抛出服务接口,请求地址为:{0},入参为:{1},请求头为:{2}".format( mock_url, data, headers)) r = requests.post(url=mock_url, data=data, headers=headers, timeout=3) code = r.status_code result = r.json() print('我要看:{0}'.format(result)) self.log.info("结束:调用订单抛出服务接口,返回数据打印:{0}".format(result)) # 关闭VPN stop_vpn() return result except Exception as f: print(f) status = False # print(status) if status == False or code != 200: self.log.warning('IP已失效,重新获取IP') url = GetSoaServerIp( env=self.env, serviceName='mock_order_throw_servicename').get_soa_url() self.log.warning("获取的新IP为:{0}".format(url)) self.opera.write_ini(section=self.env, data=url, key='mock_order_throw_ip') mock_url = 'http://' + url + ':8080/service' self.log.warning("请求url为:{0},请求data为:{1},请求头为:{2}".format( mock_url, data, headers)) try: self.log.warning( "开始:调用订单抛出服务接口,请求地址为:{0},入参为:{1},请求头为:{2}".format( mock_url, data, headers)) r = requests.post(url=mock_url, data=data, headers=headers) result = r.json() self.log.warning("结束:调用订单抛出服务接口,返回数据打印:{0}".format(result)) # 关闭VPN stop_vpn() return result except Exception as f: msg = {'msg': '发生未知错误,请联系管理员,错误日志为:{0}'.format(f)} self.log.error('发生未知错误,请联系管理员,错误日志为:{0}'.format(f)) # 关闭VPN stop_vpn() return msg # g = MockOrderThrow(pid=1,env='QA') # g.mock_order_throw(orderNo='10094010113')
class updateGoodsStock: def __init__(self, env='QA'): self.log = Logger("debug") opera = OperationIni(fileName='config.ini', pathName='config') self.get_skuId = GetGoodsDetail(env=env) self.get_access_token = GetAccessToken(env=env) # env字符串转小写 env = env.lower() key = env + '_url' self.base_url = opera.read_ini(section='goods', key=key) self.path = opera.read_ini(section='goods', key='wholeUpdateStock') self.access_token = self.get_access_token.get_ini_access_token() # if env == 'QA': # self.access_token = opera.read_ini(section='access_token', key='qa_access_token') # if env == 'DEV': # self.access_token = opera.read_ini(section='access_token', key='dev_access_token') def update_goods_stock(self, goodsId, editStockNum, storeId=None): ''' 修改商品库存 :param goodsId: 商品id :param editStockNum: 需要修改的库存 :param storeId: 门店id :return: rsq ''' url = self.base_url.format(self.path, self.access_token) # 获取skuId try: self.skuId = self.get_skuId.get_goods_detail(goodsId, storeId)[1] json_data = { 'goodsId': goodsId, 'storeId': storeId, 'skuList': [{ 'skuId': self.skuId, 'editStockNum': editStockNum }] } self.log.info('开始:调用update_goods_stock方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) r = requests.post(url=url, json=json_data) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': # 获取最新的token并存入ini文件 self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.base_url.format(self.path, new_access_token) self.log.warning( '开始:调用update_goods_stock方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) res = requests.post(url=url, json=json_data) self.log.warning('结束:调用update_goods_stock方法,返回数据为:{0}'.format( res.json())) return res.json() else: self.log.info('结束:调用update_goods_stock方法,返回数据为:{0}'.format( r.json())) return r.json() except Exception as f: self.log.error('调用获取商品详情接口失败,错误日志为:{0}'.format(f)) return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'}
clustering_alg=KMeans settings.num_clusters=list({16}) settings.num_top_words=20 #LDA only settings.max_cluster_size=10000 #the cluster will be further broken up if it is greater than this size settings.break_up_clusters=True settings.spectre_clustering_limit=15000 # if the cluster is less than 15K in size, use spectre clustering instead #LOAD DATA #generate_random_sample(unpickle_obj(X_pickle_path),unpickle_obj(y_pickle_path),unpickle_obj(ref_index_pickle_path),50000) train_set=Training(settings,pickle_dir=PICKLE_DIR) train_set.load_training() #FEATURE SELECTION best_k_attr=10000 feature_selector=Pipeline([("chi2",SelectKBest(chi2,best_k_attr))]) clustering_logger.info("Choosing best {} features".format(best_k_attr)) clustering_logger.debug("Normalizing to LV1") #NORMALIZING THE Y train_set.y=np.array([[normalize_genre_string(g,1) for g in r] for r in (row for row in train_set.y)]) clusterer=Clustering() clusterer.feature_selection(train_set,feature_selector,fit=True) lda_alg=LDA(n_topics=settings.num_clusters[0],n_iter=500, random_state=1) lda(lda_alg,train_set,settings.num_top_words) #unsupervised(train_set=train_set, settings=settings,clusterer=clusterer, clustering_alg_cls=clustering_alg)
class GetAccessToken: def __init__(self, env='QA'): self.log = Logger("debug") self.opera = OperationIni(fileName='config.ini', pathName='config') # 获取鉴权参数 self.clientId = translate_env_access_token(env=env)[0] self.clientSecret = translate_env_access_token(env=env)[1] self.env = env # env字符串转小写 self.l_env = env.lower() self.key = self.l_env + '_url' # 获取ini文件中的base_url base_url = self.opera.read_ini(section='access_token', key=self.key) self.url = base_url.format(self.clientId, self.clientSecret) # if env == 'DEV': # self.clientId = self.opera.read_ini(section='access_token', key='dev_clientId') # self.clientSecret = self.opera.read_ini(section='access_token', key='dev_clientSecret') # self.access_token = self.opera.read_ini(section='access_token', key='dev_access_token') # if env == 'QA': # self.clientId = self.opera.read_ini(section='access_token', key='qa_clientId') # self.clientSecret = self.opera.read_ini(section='access_token', key='qa_clientSecret') # self.access_token = self.opera.read_ini(section='access_token', key='qa_access_token') def get_access_token(self): ''' 通过clientId、clientSecret参数请求鉴权接口,获取最新的access_token :return: access_token ''' self.log.info('开始:调用access_token接口,请求地址为:{0}'.format(self.url)) r = requests.post(url=self.url) access_token = r.json()['access_token'] self.log.info( '结束:调用access_token接口,获取的access_token为:{0}'.format(access_token)) return access_token def set_access_token(self): ''' 把最新的access_token存入ini文件,自动覆盖旧的access_token :return: ''' key = self.l_env + '_access_token' access_token = self.get_access_token() # 保存最新获取的access_token,存入ini文件 self.opera.write_ini(section='access_token', data=access_token, key=key) self.log.info( '成功写入最新的access_token到ini文件中,子节点为:access_token,key为:{0},写入的access_token为:{1}' .format(access_token, key)) def get_ini_access_token(self): ''' 获取ini文件中已有的access_token :return: ''' # 获取ini文件中已有的access_token access_token = get_env_access_token(env=self.env) self.log.info('获取到ini文件中已有的access_token为:{0}'.format(access_token)) return access_token
txn_landmark_label.commit() if __name__ == "__main__": USE_CUDA = True GPU_ID = [0] os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in GPU_ID]) device = torch.device( "cuda:0" if torch.cuda.is_available() and USE_CUDA else "cpu") # pnet pnet_weight_path = "../models/pnet_20201009_final.pkl" pnet = PNet(test=True) LoadWeights(pnet_weight_path, pnet) pnet.to(device) # rnet rnet = None if net_type == "ONET": rnet_weight_path = "../models/rnet_20201012_iter_220000.pkl" rnet = RNet(test=True) LoadWeights(rnet_weight_path, rnet) rnet.to(device) mt = MTCNN(detectors=[pnet, rnet, None], min_face_size=24, threshold=[0.5, 0.5, 0.5], device=device) GenerateData(mt) log.info("over...")
def __init__(self,type_queue,position=0): self.queue=MetaData(type=type_queue) if self.queue.find_one()==None: Logger.info('Queue of Type: {} does not exist in database, creating'.format(type_queue)) self.queue.create(type=type_queue,position=position).save()
txn_neg_image = env_neg_image.begin(write=True) txn_neg_label = env_neg_label.begin(write=True) # 1, 读取标签(从train库里) anno_file = os.path.join(anno_dir, "wider_face_train_bbx_gt.txt") with open(anno_file, "r") as f: inner_neg_idx = 0 inner_pos_idx = 0 inner_part_idx = 0 while True: filename = f.readline() if not filename: break filename = filename.strip('\n') log.info("now process -> {}".format(filename)) face_num = f.readline() face_num = int(face_num) # img img = cv2.imread(os.path.join(train_dir, filename)) if img is None: log.warning("error to load image {}", filename) continue # 读取真值 bbox H, W, C = img.shape gt_bbox = [] for i in range(face_num): line = f.readline() line = line.split() x = int(line[0]) y = int(line[1])
class addGoods: def __init__(self, pid, env='QA'): self.log = Logger("debug") opera = OperationIni(fileName='config.ini', pathName='config') self.env = env self.pid = pid self.get_access_token = GetAccessToken(env=env, pid=pid) # env字符串转小写 env = env.lower() key = env + '_url' self.url = opera.read_ini(section='goods', key=key) self.path = opera.read_ini(section='goods', key='addgoods') self.access_token = self.get_access_token.get_ini_access_token() def add_goods(self, storeId=None, outerGoodsCode=None, outerSkuCode=None, deliveryTypeIdList=None, title=None, salePrice=None, originalPrice=None, adviseSalePriceMin=None, adviseSalePriceMax=None, goodsImageUrl=None): """ 新增商品 :param storeId: 门店id :param outerGoodsCode: 外部spu编码 :param outerSkuCode: 商家编码 :param deliveryTypeIdList: 配送类型列表,可传多个配送类型,用,隔开(1.同城限时达;2.全城配;3.包含1和2) :param title: 商品标题 :param salePrice: 售价 :param originalPrice: 市场价 :param adviseSalePriceMin: 门店售价范围开始值 :param adviseSalePriceMax: 门店售价范围结束值 :param goodsImageUrl: 商品图片 :return: """ url = self.url.format(self.path, self.access_token) pid = self.pid if self.pid == None: if self.env == 'QA': pid = 1 if self.env == 'DEV': pid = 17 # TODO 预留prod环境 if self.env == 'PROD': pid = 17 # print(pid) if storeId == None: if self.env == "QA": storeId = 1001 if self.env == "DEV": storeId = 3017 # TODO 预留prod环境 if self.env == "PROD": storeId = 3017 if outerGoodsCode == None: # 使用秒级时间戳自动拼接spu t = int(time.time()) d = 'spu' + str(t) outerGoodsCode = d # 商家编码 if outerSkuCode != None: outerSkuCode = outerSkuCode deliveryTypeId = None if deliveryTypeIdList != None: if deliveryTypeIdList == '3': deliveryTypeId = get_delivery_type( env=self.env, pid=pid, storeId=storeId, deliveryType=int(deliveryTypeIdList))[1] if deliveryTypeId == None: return {"status": 103, "message": "当前门店该配送方式不存在"} elif len(deliveryTypeId) < 2: return { "status": 104, "message": "当前门店只有一种配送方式,请重新传递配送方式ID" } else: deliveryTypeId = deliveryTypeId else: deliveryType = get_delivery_type( env=self.env, pid=self.pid, storeId=storeId, deliveryType=int(deliveryTypeIdList))[1] if deliveryType == None: return {"status": 103, "message": "当前门店该配送方式不存在"} else: deliveryTypeId = [] deliveryTypeId.append(deliveryType) if deliveryTypeIdList == None: if self.env == "QA": deliveryTypeId = [2] if self.env == "DEV": deliveryTypeId = [209435] # TODO 预留prod环境 if self.env == "PROD": deliveryTypeId = [209435] if salePrice == None: salePrice = 0.01 # if originalPrice == None: # originalPrice = 1 if adviseSalePriceMin == None: adviseSalePriceMin = 0.01 if adviseSalePriceMax == None: adviseSalePriceMax = 1 if goodsImageUrl == None: goodsImageUrl = "https://image-c.weimobmxd.com/saas-wxbiz/a016cb2de441406289433fd0c71c56bd.png" json_data = { "storeId": storeId, "goods": { "b2cGoods": { "deliveryTypeIdList": deliveryTypeId, "b2cGoodsType": 0 }, "categoryId": 274, "title": title, "isMultiSku": 0, "outerGoodsCode": outerGoodsCode, "goodsTagId": "", "goodsDesc": "", "goodsTemplateId": -1, "isMemberShipDiscount": 0, "deductStockType": 1, "isCanSell": 1, "isAutoCanSell": 0, "isAutoForbidSell": 0, "startSellTime": None, "startForbidTime": None, "categoryNameTree": "食品,零食/坚果/特产,其他休闲零食", "skuList": [{ "outerSkuCode": outerSkuCode, "productType": 1, "singleProductId": 116130117, "combineProduct": {}, "salePrice": salePrice, "adviseSalePriceMin": adviseSalePriceMin, "adviseSalePriceMax": adviseSalePriceMax, "originalPrice": originalPrice, "b2cSku": { "weight": None, "volume": None }, "isDisabled": False, "editStockNum": 0 }], "selectedGoodsAttrList": [], "selectedSaleAttrList": [], "goodsVideoUrl": None, "goodsVideoImageUrl": None, "limitBuyNum": 0, "isPutAway": 0, "saleChannelType": 3, "selectedGoodsPropList": [], "selectedInnerGoodsPropList": [], "goodsImageUrl": [goodsImageUrl], "goodsBrandId": "" } } self.log.info('开始:调用add_goods方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) requests.packages.urllib3.disable_warnings() r = requests.post(url=url, json=json_data, verify=False) # print(r.json()) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': # 获取最新的token并存入ini文件 self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.url.format(self.path, new_access_token) self.log.warning('开始:调用add_goods方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) requests.packages.urllib3.disable_warnings() res = requests.post(url=url, json=json_data, verify=False) # print(res.json(), url, json_data) try: goodsId = res.json()['data']['goodsId'] skuId = res.json()['data']['skuList'][0]['skuId'] self.log.warning( '结束:调用add_goods方法,返回数据为:{0},返回goodsId为:{1},返回skuId为:{2}'. format(res.json(), goodsId, skuId)) return res.json(), goodsId, skuId except Exception as f: # print(f) self.log.error('调用新增商品接口失败,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return res.json() elif r.json()['code']['errmsg'] == '根据Pid查询storeId失败,此商家不存在此门店': # print(r.json()['code']['errmsg']) # return r.json()['code']['errmsg'] # 获取最新的token并存入ini文件 self.log.warning( '提示:根据Pid查询storeId失败,此商家不存在此门店,尝试开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.url.format(self.path, new_access_token) self.log.warning('开始:调用add_goods方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) requests.packages.urllib3.disable_warnings() res = requests.post(url=url, json=json_data, verify=False) # print(res.json(), url, json_data) try: goodsId = res.json()['data']['goodsId'] skuId = res.json()['data']['skuList'][0]['skuId'] self.log.warning( '结束:调用add_goods方法,返回数据为:{0},返回goodsId为:{1},返回skuId为:{2}'. format(res.json(), goodsId, skuId)) return res.json(), goodsId, skuId except Exception as f: # print(f) self.log.error('调用新增商品接口失败,错误日志为:{0}'.format(f)) return {'msg': '根据Pid查询storeId失败,此商家不存在此门店,请检查storeId是否正确'} else: try: goodsId = r.json()['data']['goodsId'] skuId = r.json()['data']['skuList'][0]['skuId'] self.log.warning( '结束:调用add_goods方法,返回数据为:{0},返回goodsId为:{1},返回skuId为:{2}'. format(r.json(), goodsId, skuId)) return r.json(), goodsId, skuId except Exception as f: # print(f) self.log.error('调用新增商品接口失败1,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return r.json()
class GetOrderDetail: def __init__(self, pid, env='QA'): self.log = Logger("debug") opera = OperationIni(fileName='config.ini', pathName='config') self.env = env self.get_access_token = GetAccessToken(env=env, pid=pid) # env字符串转小写 env = env.lower() key = env + '_url' self.url = opera.read_ini(section='goods', key=key) self.path = opera.read_ini(section='goods', key='queryOrderDetail') self.access_token = self.get_access_token.get_ini_access_token() def get_order_detail(self, orderNo): ''' 获取订单详情 :param orderNo: 订单id :return: rsq ''' url = self.url.format(self.path, self.access_token) json_data = {'orderNo': orderNo} self.log.info('开始:调用get_order_detail方法,请求地址为:{0},入参为:{1}'.format(url, json_data)) requests.packages.urllib3.disable_warnings() r = requests.post(url=url, json=json_data, verify=False) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': # 获取最新的token并存入ini文件 self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() self.log.warning('开始:调用get_order_detail方法,请求地址为:{0},入参为:{1}'.format(url, json_data)) url = self.url.format(self.path, new_access_token) requests.packages.urllib3.disable_warnings() res = requests.post(url=url, json=json_data, verify=False) # print(res.json(), url, json_data) try: self.log.warning('结束:调用get_order_detail方法,返回数据为:{0}'.format(res.json())) return res.json() except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return res.json() elif r.json()['code']['errmsg'] == '根据Pid查询storeId失败,此商家不存在此门店': # print(r.json()['code']['errmsg']) # return r.json()['code']['errmsg'] # 获取最新的token并存入ini文件 self.log.warning('提示:根据Pid查询storeId失败,此商家不存在此门店,尝试开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.url.format(self.path, new_access_token) self.log.warning('开始:调用get_order_detail方法,请求地址为:{0},入参为:{1}'.format(url, json_data)) requests.packages.urllib3.disable_warnings() res = requests.post(url=url, json=json_data, verify=False) # print(res.json(), url, json_data) try: self.log.warning('结束:调用get_order_detail方法,返回数据为:{0}'.format(res.json())) return res.json() except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return res.json() else: try: self.log.info('结束:调用get_order_detail方法,返回数据为:{0}'.format(r.json())) return r.json() except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败1,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return r.json() def get_order_item_id_skuNum(self, orderNo): ''' 获取订单pickingPackageList,storeId,wid :param orderNo: 订单号 :return: 返回pickingPackageList,storeId,wid ''' # 调用get_order_detail方法获取返回订单详情数据 result = self.get_order_detail(orderNo=orderNo) try: itemList = result['data']['itemList'] # 获取该笔订单下的storeId storeId = result['data']['merchantInfo']['storeId'] # 获取该笔订单下的wid wid = result['data']['buyerInfo']['wid'] pickingPackageList = [] for i in itemList: # 获取itemId itemId = i['id'] # 获取pickSkuNum pickSkuNum = i['skuNum'] d = {"itemId": itemId, "pickSkuNum": pickSkuNum} pickingPackageList.append(d) return pickingPackageList, storeId, wid except Exception as f: self.log.error('获取订单详情中的字段失败,错误日志为:{0}'.format(f))
while lr_steps and k >= lr_steps[0]: new_lr = trainer.learning_rate * lr_decay lr_steps.pop(0) trainer.set_learning_rate(new_lr) images, targets = train_dataset.getbatch(train_batch) images = images.as_in_context(ctx) targets = targets.as_in_context(ctx) with autograd.record(): cls, box = net(images) cls = cls.reshape(cls.shape[0], cls.shape[1]) box = box.reshape(box.shape[0], box.shape[1]) cls_loss = lossFun.AddClsLoss(cls, targets) box_loss = lossFun.AddRegLoss(box, targets) loss = 3 * cls_loss + box_loss loss.backward() trainer.step(1) if k % 100 == 0: cls_auc = eval.AddClsAccuracy(cls, targets) reg_auc = eval.AddBoxMap(box, targets, 12, 12) log.info( 'iter: {}, cls_loss: {:.4f}, box_loss: {:.4f}, lr: {}, cls_auc: {:.4f}, reg_auc: {:.4f} ' .format(k, cls_loss.asscalar(), box_loss.asscalar(), trainer.learning_rate, cls_auc.asscalar(), reg_auc)) if k % 10000 == 0: net.save_parameters(save + str(k))
f.close() f = open(os.path.join(root_dir, neg_re_dir + "anno_neg.txt")) neg = f.readlines() f.close() f = open(os.path.join(root_dir, part_re_dir + "anno_part.txt")) part = f.readlines() f.close() # npos = len(pos) nneg = len(neg) npart = len(part) log.info("number: pos -> {}, neg -> {}, part -> {}".format(npos, nneg, npart)) # 85:15的比例 for i in range(4): random.shuffle(pos) random.shuffle(neg) random.shuffle(part) # pos = [pos_re_dir + i.strip() + '\n' for i in pos] neg = [neg_re_dir + i.strip() + '\n' for i in neg] part = [part_re_dir + i.strip() + '\n' for i in part] pos_train_num = int(npos * 0.95) neg_train_num = int(nneg * 0.95) part_train_num = int(npart * 0.95)
def collect_bad_url(): """ Make bows of websites in the bad url list :return: """ queue=DBQueue_old("genre_bow") #don't trust anything summarizer=Summarizer() bow=BagOfWords() short_genre_to_genre=coll.ShortGenre() url_to_bow=coll.URLBow() start_pos=queue.get() for c,line in enumerate(open("bad_url_summarize_bow.txt")): if c<start_pos: continue url=line.split(" ")[1].split(":::")[0] try: print('New url {} num: {}'.format(url,c)) url_obj=coll.URLToGenre().select(url=url).find_one() if not hasattr(url_obj,"original") or not url_obj["original"]: print("Not original") continue #request page anyways, most of the bad pages are due to bad pagess data=Request().get_data(base_util.unreplace_dot_url(base_util.unreplace_dot_url(url_obj["url"]))) if data is None: raise Exception('url {} No has page'.format(url)) else: if not hasattr(url_obj,"page") or len(data)>len(url_obj["page"]): print("updating data") data=base_util.utf_8_safe_decode(data) if not hasattr(url_obj,"page"): #save page if the new page is significantly bigger than the old one url_obj.save(page=data) else: url_obj.update(page=data) url_obj.reload() if len(data) > len(url_obj.page): raise Exception("Inconsistency b/w data and page data") #url_obj=repair.genre_to_genre_data(url_obj.document) #get genre strings #register the genre with the short genres for faster retrieval genre_string_list=[] for g in url_obj.genre: normalized_string=base_util.normalize_genre_string(g["genre"]) genre_string_list.append(normalized_string) short_genre_to_genre.select(short_genre=normalized_string).update(upsert=True,add_to_set__genres=g) Logger.info("Getting bow rep") #get BOW representation bow_dict=bow.get_word_count(summarizer.summarize(url_obj.page if isinstance(url_obj.page,str) else base_util.utf_8_safe_decode(url_obj))) if len(bow_dict)<20: raise Exception("Words less than 20") Logger.info("Update count:"+str(bow_dict)) #store the url bow in urlbow table if not url_to_bow.select(url=url_obj["url"]).find_one(): url_to_bow.create(url=url_obj["url"],bow=bow_dict,short_genres=genre_string_list) else: print('Exists bow url number {}'.format(url)) queue.increment() except Exception as ex: Logger.error(url_obj['url']+":::"+str(ex),"C:/Users/Kevin/Desktop/GitHub/Research/Webscraper/bad_url_summarize_bow1.txt")
def __onConnect(self, client, userdata, flags, rc): Logger.info('Connected to Response broker. Client : ' + str(client)) Logger.info('Connected to Response broker. User data : ' + str(userdata)) Logger.info('Connected to Response broker. Flags : ' + str(flags)) Logger.info('Connected to Response broker. Connection: ' + str(rc))
class GetGoodsDetail: def __init__(self, env='QA'): self.log = Logger("debug") opera = OperationIni(fileName='config.ini', pathName='config') self.env = env self.get_access_token = GetAccessToken(env=env) # env字符串转小写 env = env.lower() key = env + '_url' self.url = opera.read_ini(section='goods', key=key) self.path = opera.read_ini(section='goods', key='queryGoodsDetail') self.access_token = self.get_access_token.get_ini_access_token() # if env == 'QA': # self.access_token = opera.read_ini(section='access_token', key='qa_access_token') # if env == 'DEV': # self.access_token = opera.read_ini(section='access_token', key='dev_access_token') def get_goods_detail(self, goodsId, storeId=None): ''' 获取商品详情 :param goodsId: 商品id :param storeId: 门店id :return: rsq, 商品skuId ''' url = self.url.format(self.path, self.access_token) # json_data = None if storeId == None: json_data = {'goodsId': goodsId} else: json_data = {'goodsId': goodsId, 'storeId': storeId} self.log.info('开始:调用get_goods_detail方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) r = requests.post(url=url, json=json_data) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': # 获取最新的token并存入ini文件 self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() self.log.warning( '开始:调用get_goods_detail方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) url = self.url.format(self.path, new_access_token) res = requests.post(url=url, json=json_data) # print(res.json(), url, json_data) try: skuId = res.json()['data']['goods']['skuList'][0]['skuId'] self.log.warning( '结束:调用get_goods_detail方法,返回数据为:{0},返回skuId为:{1}'.format( res.json(), skuId)) return res.json(), skuId except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return res.json() elif r.json()['code']['errmsg'] == '根据Pid查询storeId失败,此商家不存在此门店': # print(r.json()['code']['errmsg']) # return r.json()['code']['errmsg'] # 获取最新的token并存入ini文件 self.log.warning( '提示:根据Pid查询storeId失败,此商家不存在此门店,尝试开始获取新的accesstoken') self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.url.format(self.path, new_access_token) self.log.warning( '开始:调用get_goods_detail方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) res = requests.post(url=url, json=json_data) # print(res.json(), url, json_data) try: skuId = res.json()['data']['goods']['skuList'][0]['skuId'] self.log.warning( '结束:调用get_goods_detail方法,返回数据为:{0},返回skuId为:{1}'.format( res.json(), skuId)) return res.json(), skuId except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return res.json() else: try: skuId = r.json()['data']['goods']['skuList'][0]['skuId'] self.log.info( '结束:调用get_goods_detail方法,返回数据为:{0},返回skuId为:{1}'.format( r.json(), skuId)) return r.json(), r.json( )['data']['goods']['skuList'][0]['skuId'] except Exception as f: # print(f) self.log.error('调用获取商品详情接口失败1,错误日志为:{0}'.format(f)) # return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'} return r.json()
# Get Config app.config.from_object(DEFAULT_CONFIG) if os.getenv('PROJECT_AWESOME_FLASK_SETTINGS'): app.config.from_envvar('PROJECT_AWESOME_FLASK_SETTINGS') # Read LOCAL_DB and PROD from environment variable # (this is set on heroku for production) if os.getenv('LOCAL_DB'): app.config['LOCAL_DB'] = (os.getenv('LOCAL_DB') == "true") if os.getenv('PROD'): app.config['PROD'] = (os.getenv('PROD') == "true") #If we are using the production database if app.config['LOCAL_DB'] == False: Logger.info(" ******** Using the Product DB - be careful! ******** ") # Print current status of the config variables Logger.info("PROD=" + str(app.config['PROD']) + " DEBUG=" + str(app.config['DEBUG']) + " LOCAL_DB=" + str(app.config['LOCAL_DB'])) SITE_DOMAIN = "http://www.goprojectawesome.com" if app.config['PROD'] == False: SITE_DOMAIN = "http://127.0.0.1:5000" # # Add methods to Jinja2 context for creating URLs # def full_url_for(*args, **kwargs): '''Wrapper for url_for that prepends the domain to the path'''
# # # 这里保存成左上角点和右下角点 xmin = ox ymin = oy xmax = xmin + ow ymax = ymin + oh if iou > IOU_POS_THRES: #### 正样本 #path_ = "/home/chengliu/MTCNN/mtcnn-pytorch/dataset/out_img/" + str(global_idx_landmark) + ".png" #cv2.imwrite(path_,out) label_list = [xmin, ymin, xmax, ymax] + landmarks + [-1] label = np.array(label_list, dtype=np.float32) txn_landmark_image.put( "{}".format(global_idx_landmark).encode("ascii"), out.tostring()) txn_landmark_label.put( "{}".format(global_idx_landmark).encode("ascii"), label.tostring()) global_idx_landmark += 1 inner_landmark_idx += 1 log.info("landmark num: {}".format(global_idx_landmark)) if inner_landmark_idx > 1000: txn_landmark_image.commit() txn_landmark_label.commit() txn_landmark_image = env_landmark_image.begin(write=True) txn_landmark_label = env_landmark_label.begin(write=True) inner_landmark_idx = 0 log.info("now commit landmark lmdb") log.info("process done!") txn_landmark_image.commit() txn_landmark_label.commit()
class GetMockUrl: def __init__(self, env): self.log = Logger("debug") opera = OperationIni() chrome_driver = findPath.data_dir(fileName='chromedriver.exe', pathName='driver') base_url = opera.read_ini(section='CONFIG', key='base_url') url = None if env == "DEV": url = base_url + opera.read_ini(section=env, key='url') if env == "QA": url = base_url + opera.read_ini(section=env, key='url') if env == "PROD": url = base_url + opera.read_ini(section=env, key='url') self.userName = opera.read_ini(section='CONFIG', key='userName') self.passWord = opera.read_ini(section='CONFIG', key='passWord') self.mockServiceName = opera.read_ini(section='CONFIG', key='mockServiceName') chrome_options = Options() # 设置chrome浏览器无界面模式 chrome_options.add_argument('--headless') self.log.info("开始调用webdriver,当前模式为Chrome无界面模式") self.d = webdriver.Chrome(executable_path=chrome_driver, chrome_options=chrome_options) self.d.maximize_window() print('-------------------------------------------------------') print('成功打开谷歌浏览器') self.log.info('成功打开谷歌浏览器') self.d.get(url) self.d.implicitly_wait(30) print('-------------------------------------------------------') print('成功打开网址:{0}'.format(url)) self.log.info('成功打开网址:{0}'.format(url)) def get_mock_url(self): self.d.find_element_by_xpath( "//input[@placeholder='请输入用户名']").send_keys(self.userName) self.d.find_element_by_xpath( "//input[@placeholder='请输入密码']").send_keys(self.passWord) self.d.find_element_by_xpath( "//button[@class='ivu-btn ivu-btn-primary ivu-btn-long']").click() self.d.implicitly_wait(30) print('-------------------------------------------------------') print('登录成功') self.log.info('登录成功') self.d.find_element_by_id('searchContent').send_keys( self.mockServiceName) print('-------------------------------------------------------') print('正在查询对应服务') self.log.info("正在查询服务:{0}".format(self.mockServiceName)) self.d.find_element_by_id('searchContent').send_keys(Keys.ENTER) u = self.d.find_element_by_xpath( "//*[@id='table_o']/tbody/tr[2]/td[2]/a").text # 截取冒号之前的ip段,并转换成str类型 str_url = (" ".join(u.split(':')[:1])) self.log.info("查询到当前服务ip为:{0}".format(str_url)) # print(str_url, type(str_url)) self.d.quit() return str_url # g = GetMockUrl() # g.get_mock_url()
def update_url(url,new_url): url=replace_dot_url(url) new_url=replace_dot_url(new_url) Logger.info('Updating {} to {}'.format(url,new_url)) return URLToGenre.objects(url=url).update(url=new_url)
class updateGoodsPrice: def __init__(self, env='QA'): self.log = Logger("debug") self.opera = OperationIni(fileName='config.ini', pathName='config') self.get_skuId = GetGoodsDetail(env=env) self.get_access_token = GetAccessToken(env=env) # env字符串转小写 env = env.lower() key = env + '_url' self.base_url = self.opera.read_ini(section='goods', key=key) self.path = self.opera.read_ini(section='goods', key='updatePrice') self.access_token = self.get_access_token.get_ini_access_token() def update_goods_price(self, storeId, goodsId, originalPrice, salePrice): ''' 修改商品价格 :param storeId: 门店id :param goodsId: 商品id :param originalPrice: 市场价 :param salePrice: 商家统一价 :return: rsq ''' url = self.base_url.format(self.path, self.access_token) # 获取skuId try: self.skuId = self.get_skuId.get_goods_detail(goodsId, storeId)[1] json_data = { 'goodsId': goodsId, 'skuList': [{ 'skuId': self.skuId, 'originalPrice': originalPrice, 'salePrice': salePrice, }], 'storeId': storeId } self.log.info('开始:调用update_goods_price方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) r = requests.post(url=url, json=json_data) # 如果access_token无效 if r.json()['data'] == 'invalid accesstoken': self.log.warning('提示:ini文件中的accesstoken失效,开始获取新的accesstoken') # 获取最新的token并存入ini文件 self.get_access_token.set_access_token() # 注意:这里一定要重新获取一次ini文件中的access_token new_access_token = self.get_access_token.get_ini_access_token() url = self.base_url.format(self.path, new_access_token) self.log.warning( '开始:调用update_goods_price方法,请求地址为:{0},入参为:{1}'.format( url, json_data)) res = requests.post(url=url, json=json_data) self.log.warning('结束:调用update_goods_price方法,返回数据为:{0}'.format( res.json())) return res.json() else: self.log.info('结束:调用update_goods_price方法,返回数据为:{0}'.format( r.json())) return r.json() except Exception as f: self.log.error('修改商品失败,错误日志为:{0}'.format(f)) return {'msg': '底层接口请求失败,请检查所传字段的数据是否正确'}
class BaseModel: """Abstract class of model for tensorflow graph""" AUTHOR = 'demetoir' def __str__(self): return "%s_%s" % (self.AUTHOR, self.__class__.__name__) def __init__(self, input_shapes=None, params=None, logger_path=None, root_path=ROOT_PATH): """create instance of AbstractModel :type logger_path: str :param logger_path: path for log file if logger_path is None, log ony stdout """ self.root_path = root_path if logger_path is None: self.log = Logger(self.__class__.__name__, LOG_PATH) else: self.log = Logger(self.__class__.__name__, logger_path) self.sess = None self.saver = None self.summary_writer = None self.is_built = False # gen instance id self.input_shapes = input_shapes self.params = params self.id = "_".join([self.__str__(), time_stamp()]) self.instance_path = os.path.join(INSTANCE_PATH, self.id) self.instance_visual_result_folder_path = os.path.join( self.instance_path, VISUAL_RESULT_FOLDER) self.instance_source_folder_path = os.path.join( self.instance_path, 'src_code') self.instance_summary_folder_path = os.path.join( self.instance_path, 'summary') self.instance_class_name = self.__class__.__name__ self.instance_source_path = os.path.join( self.instance_source_folder_path, self.id + '.py') self.metadata_path = os.path.join(self.instance_path, 'instance.meta') self.save_folder_path = os.path.join(self.instance_path, 'check_point') self.check_point_path = os.path.join(self.save_folder_path, 'instance.ckpt') self.metadata = { MODEL_METADATA_KEY_INSTANCE_ID: self.id, MODEL_METADATA_KEY_INSTANCE_PATH: self.instance_path, MODEL_METADATA_KEY_INSTANCE_VISUAL_RESULT_FOLDER_PATH: self.instance_visual_result_folder_path, MODEL_METADATA_KEY_INSTANCE_SOURCE_FOLDER_PATH: self.instance_source_folder_path, MODEL_METADATA_KEY_INSTANCE_SOURCE_PATH: self.instance_source_path, MODEL_METADATA_KEY_INSTANCE_SUMMARY_FOLDER_PATH: self.instance_summary_folder_path, MODEL_METADATA_KEY_INSTANCE_CLASS_NAME: self.instance_class_name, MODEL_METADATA_KEY_METADATA_PATH: self.metadata_path, MODEL_METADATA_KEY_CHECK_POINT_PATH: self.check_point_path, MODEL_METADATA_KEY_SAVE_FOLDER_PATH: self.save_folder_path, MODEL_METADATA_KEY_PARAMS: self.params, MODEL_METADATA_KEY_INPUT_SHAPES: self.input_shapes, } def __del__(self): # TODO this del need hack try: self.close_session() # reset tensorflow graph tf.reset_default_graph() del self.sess del self.root_path del self.log except BaseException as e: pass @property def hyper_param_key(self): return [] def setup_model(self): self.log.debug('init directory') setup_directory(self.instance_path) setup_directory(self.instance_visual_result_folder_path) setup_directory(self.instance_source_folder_path) setup_directory(self.instance_summary_folder_path) setup_directory(self.save_folder_path) def load_metadata(self, path): self.metadata = load_json(path) self.id = self.metadata[MODEL_METADATA_KEY_INSTANCE_ID] self.instance_path = self.metadata[MODEL_METADATA_KEY_INSTANCE_PATH] self.instance_visual_result_folder_path = self.metadata[ MODEL_METADATA_KEY_INSTANCE_VISUAL_RESULT_FOLDER_PATH] self.instance_source_path = self.metadata[ MODEL_METADATA_KEY_INSTANCE_SOURCE_PATH] self.instance_class_name = self.metadata[ MODEL_METADATA_KEY_INSTANCE_CLASS_NAME] self.instance_summary_folder_path = self.metadata[ MODEL_METADATA_KEY_INSTANCE_SUMMARY_FOLDER_PATH] self.save_folder_path = self.metadata[ MODEL_METADATA_KEY_SAVE_FOLDER_PATH] self.check_point_path = self.metadata[ MODEL_METADATA_KEY_CHECK_POINT_PATH] self.params = self.metadata[MODEL_METADATA_KEY_PARAMS] self.input_shapes = self.metadata[MODEL_METADATA_KEY_INPUT_SHAPES] def save_metadata(self, path): self.log.debug('dump metadata') dump_json(self.metadata, path) def open_session(self): if self.sess is None: self.sess = tf.Session() self.saver = tf.train.Saver() self.sess.run(tf.global_variables_initializer()) # self.summary_writer = tf.summary.FileWriter(self.instance_summary_folder_path, self.sess.graph) else: raise Exception("fail to open tf session") def close_session(self): if self.sess is not None: self.sess.close() if self.saver is not None: self.saver = None if self.summary_writer is not None: pass # self.summary_writer.close() def build(self): try: with tf.variable_scope(str(self.id)): with tf.variable_scope("misc_ops"): self.log.debug("build_misc_ops") self.build_misc_ops() with tf.variable_scope("hyper_parameter"): self.log.debug('build_hyper_parameter') self.hyper_parameter() self.build_hyper_parameter(self.params) self.log.debug('build_input_shapes') if self.input_shapes is None: raise AttributeError("input_shapes not feed") self.build_input_shapes(self.input_shapes) self.log.debug('build_main_graph') self.build_main_graph() with tf.variable_scope('loss_function'): self.log.debug('build_loss_function') self.build_loss_function() with tf.variable_scope('train_ops'): self.log.debug('build_train_ops') self.build_train_ops() with tf.variable_scope('summary_ops'): self.log.debug('build_summary_ops') self.build_summary_ops() except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() self.log.error("\n", "".join(traceback.format_tb(exc_traceback))) raise ModelBuildFailError("ModelBuildFailError") else: self.is_built = True self.log.info("build success") def build_input_shapes(self, input_shapes): """load input shapes for tensor placeholder :type input_shapes: dict :param input_shapes: input shapes for tensor placeholder :raise NotImplementError if not Implemented """ raise NotImplementedError def build_hyper_parameter(self, params=None): """load hyper parameter for model :param params: :raise NotImplementError if not implemented """ if params is not None: for key in self.hyper_param_key: self.__dict__[key] = params[key] def build_main_graph(self): """load main tensor graph :raise NotImplementError if not implemented """ raise NotImplementedError def build_loss_function(self): """load loss function of model :raise NotImplementError if not implemented """ raise NotImplementedError def build_misc_ops(self): """load misc operation of model :raise NotImplementError if not implemented """ self.global_step = tf.get_variable("global_step", shape=1, initializer=tf.zeros_initializer) self.op_inc_global_step = tf.assign(self.global_step, self.global_step + 1, name='op_inc_global_step') self.global_epoch = tf.get_variable("global_epoch", shape=1, initializer=tf.zeros_initializer) self.op_inc_global_step = tf.assign(self.global_epoch, self.global_step + 1, name='op_inc_global_epoch') def build_train_ops(self): """Load train operation of model :raise NotImplementError if not implemented """ raise NotImplementedError def build_summary_ops(self): """load summary operation for tensorboard :raise NotImplemented if not implemented """ pass def write_summary(self, sess=None, iter_num=None, dataset=None, summary_writer=None): """write summary of model for tensorboard :type sess: Session object for tensorflow.Session :type iter_num: int :type dataset: dataset_handler.AbstractDataset :type summary_writer: tensorflow.summary.FileWriter :param sess: session object for tensorflow :param iter_num: current iteration number :param dataset: dataset for train model :param summary_writer: file writer for tensorboard summary :raise NotImplementedError if not implemented """ pass def hyper_parameter(self): self.batch_size = None pass def save(self): self.setup_model() self.save_metadata(self.metadata_path) if self.sess is None: self.open_session() self.saver.save(self.sess, self.check_point_path) self.log.info("saved at {}".format(self.instance_path)) return self.instance_path def load(self, path): path = os.path.join(path, 'instance.meta') self.load_metadata(path) self.build() self.close_session() self.open_session() self.saver.restore(self.sess, self.check_point_path) def get_tf_values(self, fetches, feet_dict): self.sess.run(fetches, feet_dict) def if_not_ready_to_train(self): if not self.is_built: self.build() if self.sess is None: self.open_session()