def build_score_dict(jsonlistfile): score_dict = {} json_list = open(jsonlistfile).readlines() for line in json_list: fields = line.strip().split("/") text_id = "_".join(fields[-4:-2]) time_stamp = fields[-2][-8:] if (not score_dict.has_key(text_id)): score_dict[text_id] = {} json_items = open(line.strip()).readlines() for json_item in json_items: json_entity = json.loads(json_item.strip()) if json_entity.has_key("AudioName"): audio_id = str(json_entity["AudioName"]) elif json_entity.has_key("audioName"): audio_id = str(json_entity["audioName"]) else: log.Error("bad json file:%s" % json_item) if json_entity.has_key("Score"): score = float(json_entity["Score"]) elif json_entity.has_key("score"): score = float(json_entity["score"]) else: log.Error("bad json file:%s" % json_item) score_id = time_stamp + "_" + audio_id if score_dict[text_id].has_key(score_id): log.Error("repeated score_id: %s for text_id: %s" % (score_id, text_id)) score_dict[text_id][score_id] = score return score_dict
def _send(self): try: message = self._MakeMessage() smtpObj = self.m_parent.Login() smtpObj.sendmail(self.m_sender, self.m_receive, message.as_string()) smtpObj.quit() except smtplib.SMTPException as e: log.Error("send mail false", e) return False except BaseException as e: log.Error("sned mail false by other reason",e) return False return True
def get_community_info(cityName, keyword, filter_word=None): ''' cityName: 城市 keyword: 小区名 filter_word 过滤关键字, 对region进行匹配 @return 返回小区列表 ''' global g_session data = { "cityName": cityName, "channel": "xiaoqu", "keyword": keyword, "query": keyword, } url = url_encode("https://ajax.api.ke.com/sug/headerSearch", data) result, _ = get_url(url, session=g_session) result_list = {} if is_not_ok(result): log.Error("get_community_info url false", cityName, keyword, result.status_code) return result_data = js2py_val(result.content) if result_data["errno"] != 0: log.Error("get_community_info not ok", cityName, keyword, keyword) return result_list if len(result_data["data"]) == 0: log.Waring("get_community_info data is nil", cityName, result) return result_list for data in result_data["data"]["result"]: if filter_word and not filter_word in data["region"]: log.Info("get_community_info ingore by filter_word", cityName, keyword, data) continue new_data = { "city": cityName, "name": data["text"], "id": data["id"], "region": data["region"], "house_url_list": [], "house_data": {}, } new_data["house_url_list"] = get_house_list(data["id"]) result_list[new_data["id"]] = new_data return result_list
def set_wav(wav_dict, score_dict): for text_id in wav_dict.keys(): if not score_dict.has_key(text_id): log.Error("we don't have audio for the text_id:%s"%text_id) for wav_id, score in score_dict[text_id].items(): wav_dict[text_id].setWavList(wav_id, score)
def validatePhonePosition(self): for i in range(len(self.phone_position)): if self.phone_position[i][0] > self.phone_position[i][1]: log.Error("phone position error") elif self.phone_position[i][0] == self.phone_position[i][1]: log.Warn("this phone only has one frame: " + self.__file_id + " " + str(self.phone_position))
def GetPlatform(): """Return string describing current OS platform.""" if bool(platform.mac_ver()[0]): return PLATFORM_OSX elif bool(platform.dist()[0]): return PLATFORM_LINUX log.Error('Error: Unknown platform type: %s' % type) return PLATFORM_DEFAULT
def set_wav(wav_dict, score_dict, exclude_dict): for text_id in wav_dict.keys(): if (not score_dict.has_key(text_id)) or (len(exclude_dict[text_id]) == len(score_dict[text_id])): log.Error("we don't have audio for the text_id:%s" % text_id) for wav_id, score in score_dict[text_id].items(): if not wav_id in exclude_dict[text_id]: wav_dict[text_id].setWavList(wav_id, score)
def innetProduct_dot(matrix_a, matrix_b): height1 = matrix_a.shape[0] height2 = matrix_b.shape[0] distance_matrix = [] if height1 != height2: log.Error("the lenght of two matrixes are not same") for i in range(height1): distance_matrix.append(np.dot(matrix_a[i, :], matrix_b[i, :])) return np.array(distance_matrix)
def getWav1(self, lowscore, highscore, num): if (self.__wav_list == None): log.Error("wav-score list is null") wav_id_list = [] i = 0 while len(wav_id_list) < num: wav_id_list = [] for (wav_id, score) in self.__wav_list.items(): if (score > lowscore and score < highscore): wav_id_list.append(wav_id) lowscore -= 2 highscore += 2 i += 1 if i > 1: log.Log("retry %d times for sentence %s" % (i - 1, self.__text_id)) if len(wav_id_list) < num: log.Error( "not enough wav file meet the requirment of score, has: %d, require: %d" % (len(wav_id_list), num)) return random.sample(wav_id_list, num)
def RunTask(self): rm_task = [] for taskobj in self.m_task: if not taskobj.CanRun(): continue taskobj.Run() if taskobj.IsOver(): log.Error("task over", taskobj) rm_task.append(taskobj) for taskobj in rm_task: self.m_task.remove(taskobj)
def getSamplingFunction(self, sampling_type): sampling_type_enum = { "median": self.medianSampling, "mean": self.meanSampling, "lastframe": self.lastframeSampling, "uniform": self.uniformSampling, "uniform_mean": self.uniformMeanSampling } if sampling_type_enum.has_key(sampling_type): return sampling_type_enum[sampling_type] else: log.Error("No such kind of sampling function")
def getWav2(self, lowscore, highscore): if (self.__wav_list == None): log.Error("wav-score list is null") wav_id_list = [] for (wav_id, score) in self.__wav_list.items(): if (score > lowscore and score < highscore): wav_id_list.append(wav_id) if len(wav_id_list) < 300: log.Warn( "not enough wav file meet the requirment of score, has: %d, require: %d" % (len(wav_id_list), 300)) return wav_id_list
def Run(self): try: self.m_trigger.Trigger() self.m_func(self, *self.m_args) except BaseException as error: self.m_status = CTask.__Over logging.exception(error) log.Error("run task fail", str(self)) else: if self.IsRunOnce(): self.m_status = CTask.__Over elif self.IsRunForver(): self.m_status = CTask.__None
def KLdivergence_dot(matrix_a, matrix_b, symmetrized=1): height1 = matrix_a.shape[0] height2 = matrix_b.shape[0] distance_matrix = [] if height1 != height2: log.Error("the lenght of two matrixes are not same") matrix_a_log = np.log(matrix_a) matrix_b_log = np.log(matrix_b) for i in range(height1): res1 = np.dot(matrix_a[i, :], matrix_a_log[i, :]) - np.dot(matrix_a[i, :], matrix_b_log[i, :]) if (symmetrized): res2 = np.dot(matrix_b[i, :], matrix_b_log[i, :]) - np.dot(matrix_b[i, :], matrix_a_log[i, :]) distance_matrix.append(0.5 * (res1 + res2)) else: distance_matrix.append(res1) return np.array(distance_matrix)
def MakeArgumentList(args): if args is None: arg_list = [] elif isinstance(args, str): arg_list = [args] else: assert isinstance(args, list) arg_list = args filtered = [] for idx in range(len(arg_list)): if isinstance(arg_list[idx], str): filtered.append(arg_list[idx]) elif isinstance(arg_list[idx], tuple): value, allowed_platform = arg_list[idx] if IsPlatform(allowed_platform): filtered.append(value) else: log.Error('Unknown argument type') return filtered
def thread_spike_fund(threadobj, *fund_list): ok = 0 err = 0 i=0 l = len(fund_list) fund_yeild = {} stock_total = {} for code in fund_list: i+=1 if i%50 == 0: log.Info("thread_spike_fund",threadobj.getName(),i,l-i) try: data = fund_api.spiker_fund_and_save(code) base_data = data["base"] if not base_data["type"] in __filter_stop_type: continue if not tools.is_float(base_data["new_worth_ratio"]): continue for k,v in base_data["stock"].items(): if not k in stock_total: stock_total[k] = 0 stock_total[k]+=1 fund_yeild[code] = { "now": base_data["new_worth_ratio"], "history":data["data"]["yield"], "name": data["name"] } ok+=1 except BaseException as error: log.Error("spiker fund false", threadobj.getName(), code, error) err+=1 continue return { "ok":ok, "error":err, "data":{ "stock":stock_total, "yeild":fund_yeild, }, }
def get_house_info(url, house_data): def get_total(data, htree): ls = htree.xpath( './/div[@class="overview"]//span[@class="total"]/text()') if len(ls) == 0 or not tools.is_float(ls[0]): log.Waring("get_house_info -> get_total false") return data["价格"] = tools.tofloat(ls[0]) def get_info(data, htree): #基本属性 def get_info2(key): parttern = './div[@data-component="baseinfo"]//div[@class="introContent"]//div[@class="%s"]//ul//li' % ( key) ls = htree.xpath(parttern) if len(ls) == 0: log.Waring("get_house_info -> get_info -> get_info2 false", key) return d = {} for li in ls: ls1 = li.xpath('./span/text()') if len(ls1) > 1 and trim_str(ls1[0]) == "抵押信息": #特殊处理该处信息 ls2 = ls1[1:] else: if key == "transaction": #交易属性 ls2 = li.xpath('.//a/text()') else: ls2 = li.xpath('./text()') if len(ls1) == 0 or len(ls2) == 0: log.Waring("get_house_info -> get_info base false", ls1, ls2) continue k = trim_str(ls1[0]) v = trim_str(ls2[0]) if k == "建筑面积" or k == "套内面积": v = tools.tofloat(v.replace("㎡", ""), 5) data[k] = v #data[key] = d get_info2("base") get_info2("transaction") if "价格" in data and "建筑面积" in data: data["均价"] = tools.tofloat( float(data["价格"]) / float(data["建筑面积"]), 5) r = re.match(__pHouseID, url) if not r: log.Error("get_house_info no hid", url) return None house_info = { "id": r.groups()[0], } result, _ = get_url(url, session=g_session) if result.status_code != 200: log.Waring("request house url false", url) return None htree = etree.HTML(result.text) ls = htree.xpath( '//div[@class="sellDetailPage"]//div[@data-component="overviewIntro"]') if len(ls) > 0: get_total(house_info, ls[0]) ls = htree.xpath( '//div[@class="sellDetailPage"]//div[@class="m-content"]//div[@class="box-l"]' ) if len(ls) > 0: get_info(house_info, ls[0]) house_data[house_info["id"]] = house_info