def runscheduler(client, username, num): # 检索任务数量 tasklist = [] listdir('./TaskList', tasklist) logging.info('【任务调度】: 当前任务数量' + str(len(tasklist))) forNum = 0 for task in tasklist: user = main.readJson() if ('taskNum' in user[0]): taskNum = user[0]['taskNum'] else: logging.error('Json未配置taskNum,停止运行') sys.exit() logging.info('【任务统计】: 已进行' + taskNum + '个任务') if forNum >= int(taskNum): logging.info('【任务分配】: ' + task) i = importlib.import_module('TaskList.' + task) i.main(client, username, num) resetJson('./', './', int(taskNum) + 1) forNum = forNum + 1 dtime = random.randint(3, 30) logging.info('【任务调度】: 延时进行' + str(dtime) + '秒') time.sleep(dtime) else: logging.info('【任务跳过】: ' + task) forNum = forNum + 1
def runscheduler(client, username, num): # 检索任务数量 tasklist = [] listdir(resource_path('TaskList'), tasklist) #整理数组 tasklist.sort() logging.info('【任务调度】: 当前任务数量' + str(len(tasklist))) user = main.readJson() if ('taskNum' in user[0]): taskNum = user[0]['taskNum'] else: logging.error('Json未配置taskNum,停止运行') sys.exit() #日期不同重置任务 if configdate() != str(datetime.datetime.now().month) + str( datetime.datetime.now().day): resetJson('./', './', 0) logging.info('【任务调度】: 日期变更重置任务') logging.info('【任务统计】: 已进行' + taskNum + '个任务') forNum = 0 for task in tasklist: if forNum >= int(taskNum): logging.info('【任务分配】: ' + task) i = importlib.import_module('TaskList.' + task) #启动线程处理 thread = threading.Thread(target=i.main(client, username, num)) #i.main(client,username,num) thread.start() time.sleep(1) thread.join() resetJson('./', './', int(taskNum) + 1) forNum = forNum + 1 if forNum < len(tasklist): dtime = random.randint(3, 10) logging.info('【任务调度】: 延时进行' + str(dtime) + '秒') time.sleep(dtime) else: print('【任务结束】: 正在尝试发送通知') else: logging.info('【任务跳过】: ' + task) forNum = forNum + 1
if __name__ == '__main__': #from main import parseDateType #jsonfileList = parseDateType('onlynews_sort',datetype='10') #jsonfileList.extend(parseDateType('onlynews_sort',datetype='11')) #jsonfileList.sort() #textfileList = parseDateType('onlynews_select_sort',datetype='10') #textfileList.extend(parseDateType('onlynews_select_sort',datetype='11')) #textfileList.sort() jsonfileList = ['onlynews_sort/ptt_news_2015-10-19.json'] textfileList = ['onlynews_select_sort/ptt_news_2015-10-19.txt'] idx = 0 td_first = TopicDetection() first_corpus = True last_datetime = '' for jsonfile, textfile in zip(jsonfileList, textfileList): dataList = readJson(jsonfile) textList = readText(textfile) print textfile head, tail = os.path.split(textfile) tail = tail.split('.')[0] matfile = "result/" + tail + ".mat" labelfile = "result/" + tail + "_label.txt" gendictfile = "gensim_tmp/" + tail + ".dict" #on-the-fly finalpredfile = "result/" + tail + "_mean.pred" finaltopicfile = "result/" + tail + "_mean.topic" for idx in range(len(textList)): newtext = [textList[idx]] if first_corpus and idx == 0: td_first.constructTermVector(newtext, gendictfile,
if __name__ == "__main__": # from main import parseDateType # jsonfileList = parseDateType('onlynews_sort',datetype='10') # jsonfileList.extend(parseDateType('onlynews_sort',datetype='11')) # jsonfileList.sort() # textfileList = parseDateType('onlynews_select_sort',datetype='10') # textfileList.extend(parseDateType('onlynews_select_sort',datetype='11')) # textfileList.sort() jsonfileList = ["onlynews_sort/ptt_news_2015-10-19.json"] textfileList = ["onlynews_select_sort/ptt_news_2015-10-19.txt"] idx = 0 td_first = TopicDetection() first_corpus = True last_datetime = "" for jsonfile, textfile in zip(jsonfileList, textfileList): dataList = readJson(jsonfile) textList = readText(textfile) print textfile head, tail = os.path.split(textfile) tail = tail.split(".")[0] matfile = "result/" + tail + ".mat" labelfile = "result/" + tail + "_label.txt" gendictfile = "gensim_tmp/" + tail + ".dict" # on-the-fly finalpredfile = "result/" + tail + "_mean.pred" finaltopicfile = "result/" + tail + "_mean.topic" for idx in range(len(textList)): newtext = [textList[idx]] if first_corpus and idx == 0: td_first.constructTermVector(newtext, gendictfile, outfile=matfile, update=False) last_datetime = datetime.strptime(dataList[idx]["datetime"], "%Y-%m-%d %H:%M:%S")
max_length = -1 for label, titleList in cluster_dict.iteritems(): if len(titleList) > max_length: max_label = label max_length = len(titleList) outtext = open(outtextfile, "w") outjson = open(outjsonfile, "w") for (title, idx) in cluster_dict[max_label]: outtext.write("%s" % (textList[idx])) outjson.write(json.dumps(jsonList[idx], ensure_ascii=False).encode("utf8") + "\n") outtext.close() outjson.close() if __name__ == "__main__": if len(sys.argv) != 4: print "usage: python cluster_analysis.py textfile jsonfile outfile" print "(e.g.) python cluster_analysis.py vectordata/total_10.txt vectordata/total_10.json cluster_title_10.txt" exit() clusterfile = "cluster_label.txt" textfile = sys.argv[1] jsonfile = sys.argv[2] outfile = sys.argv[3] labelList = readCluster(clusterfile) textList = readText(textfile) jsonList = readJson(jsonfile) outputCluster(textList, jsonList, labelList, outfile) # extractCluster(textList, jsonList, labelList, 'vectordata/new.txt', 'vectordata/new.json')
if len(titleList) > max_length: max_label = label max_length = len(titleList) outtext = open(outtextfile, 'w') outjson = open(outjsonfile, 'w') for (title, idx) in cluster_dict[max_label]: outtext.write('%s' % (textList[idx])) outjson.write( json.dumps(jsonList[idx], ensure_ascii=False).encode('utf8') + "\n") outtext.close() outjson.close() if __name__ == '__main__': if len(sys.argv) != 4: print 'usage: python cluster_analysis.py textfile jsonfile outfile' print '(e.g.) python cluster_analysis.py vectordata/total_10.txt vectordata/total_10.json cluster_title_10.txt' exit() clusterfile = 'cluster_label.txt' textfile = sys.argv[1] jsonfile = sys.argv[2] outfile = sys.argv[3] labelList = readCluster(clusterfile) textList = readText(textfile) jsonList = readJson(jsonfile) outputCluster(textList, jsonList, labelList, outfile) #extractCluster(textList, jsonList, labelList, 'vectordata/new.txt', 'vectordata/new.json')