Ejemplo n.º 1
0
def runscheduler(client, username, num):
    # 检索任务数量
    tasklist = []
    listdir('./TaskList', tasklist)
    logging.info('【任务调度】: 当前任务数量' + str(len(tasklist)))
    forNum = 0
    for task in tasklist:
        user = main.readJson()
        if ('taskNum' in user[0]):
            taskNum = user[0]['taskNum']
        else:
            logging.error('Json未配置taskNum,停止运行')
            sys.exit()
        logging.info('【任务统计】: 已进行' + taskNum + '个任务')
        if forNum >= int(taskNum):
            logging.info('【任务分配】: ' + task)
            i = importlib.import_module('TaskList.' + task)
            i.main(client, username, num)
            resetJson('./', './', int(taskNum) + 1)
            forNum = forNum + 1
            dtime = random.randint(3, 30)
            logging.info('【任务调度】: 延时进行' + str(dtime) + '秒')
            time.sleep(dtime)
        else:
            logging.info('【任务跳过】: ' + task)
            forNum = forNum + 1
Ejemplo n.º 2
0
def runscheduler(client, username, num):
    # 检索任务数量
    tasklist = []
    listdir(resource_path('TaskList'), tasklist)
    #整理数组
    tasklist.sort()
    logging.info('【任务调度】: 当前任务数量' + str(len(tasklist)))
    user = main.readJson()
    if ('taskNum' in user[0]):
        taskNum = user[0]['taskNum']
    else:
        logging.error('Json未配置taskNum,停止运行')
        sys.exit()
    #日期不同重置任务
    if configdate() != str(datetime.datetime.now().month) + str(
            datetime.datetime.now().day):
        resetJson('./', './', 0)
        logging.info('【任务调度】: 日期变更重置任务')
    logging.info('【任务统计】: 已进行' + taskNum + '个任务')
    forNum = 0
    for task in tasklist:
        if forNum >= int(taskNum):
            logging.info('【任务分配】: ' + task)
            i = importlib.import_module('TaskList.' + task)
            #启动线程处理
            thread = threading.Thread(target=i.main(client, username, num))
            #i.main(client,username,num)
            thread.start()
            time.sleep(1)
            thread.join()
            resetJson('./', './', int(taskNum) + 1)
            forNum = forNum + 1
            if forNum < len(tasklist):
                dtime = random.randint(3, 10)
                logging.info('【任务调度】: 延时进行' + str(dtime) + '秒')
                time.sleep(dtime)
            else:
                print('【任务结束】: 正在尝试发送通知')
        else:
            logging.info('【任务跳过】: ' + task)
            forNum = forNum + 1
Ejemplo n.º 3
0
if __name__ == '__main__':
    #from main import parseDateType
    #jsonfileList = parseDateType('onlynews_sort',datetype='10')
    #jsonfileList.extend(parseDateType('onlynews_sort',datetype='11'))
    #jsonfileList.sort()
    #textfileList = parseDateType('onlynews_select_sort',datetype='10')
    #textfileList.extend(parseDateType('onlynews_select_sort',datetype='11'))
    #textfileList.sort()
    jsonfileList = ['onlynews_sort/ptt_news_2015-10-19.json']
    textfileList = ['onlynews_select_sort/ptt_news_2015-10-19.txt']
    idx = 0
    td_first = TopicDetection()
    first_corpus = True
    last_datetime = ''
    for jsonfile, textfile in zip(jsonfileList, textfileList):
        dataList = readJson(jsonfile)
        textList = readText(textfile)
        print textfile
        head, tail = os.path.split(textfile)
        tail = tail.split('.')[0]
        matfile = "result/" + tail + ".mat"
        labelfile = "result/" + tail + "_label.txt"
        gendictfile = "gensim_tmp/" + tail + ".dict"
        #on-the-fly
        finalpredfile = "result/" + tail + "_mean.pred"
        finaltopicfile = "result/" + tail + "_mean.topic"
        for idx in range(len(textList)):
            newtext = [textList[idx]]
            if first_corpus and idx == 0:
                td_first.constructTermVector(newtext,
                                             gendictfile,
Ejemplo n.º 4
0
if __name__ == "__main__":
    # from main import parseDateType
    # jsonfileList = parseDateType('onlynews_sort',datetype='10')
    # jsonfileList.extend(parseDateType('onlynews_sort',datetype='11'))
    # jsonfileList.sort()
    # textfileList = parseDateType('onlynews_select_sort',datetype='10')
    # textfileList.extend(parseDateType('onlynews_select_sort',datetype='11'))
    # textfileList.sort()
    jsonfileList = ["onlynews_sort/ptt_news_2015-10-19.json"]
    textfileList = ["onlynews_select_sort/ptt_news_2015-10-19.txt"]
    idx = 0
    td_first = TopicDetection()
    first_corpus = True
    last_datetime = ""
    for jsonfile, textfile in zip(jsonfileList, textfileList):
        dataList = readJson(jsonfile)
        textList = readText(textfile)
        print textfile
        head, tail = os.path.split(textfile)
        tail = tail.split(".")[0]
        matfile = "result/" + tail + ".mat"
        labelfile = "result/" + tail + "_label.txt"
        gendictfile = "gensim_tmp/" + tail + ".dict"
        # on-the-fly
        finalpredfile = "result/" + tail + "_mean.pred"
        finaltopicfile = "result/" + tail + "_mean.topic"
        for idx in range(len(textList)):
            newtext = [textList[idx]]
            if first_corpus and idx == 0:
                td_first.constructTermVector(newtext, gendictfile, outfile=matfile, update=False)
                last_datetime = datetime.strptime(dataList[idx]["datetime"], "%Y-%m-%d %H:%M:%S")
    max_length = -1
    for label, titleList in cluster_dict.iteritems():
        if len(titleList) > max_length:
            max_label = label
            max_length = len(titleList)
    outtext = open(outtextfile, "w")
    outjson = open(outjsonfile, "w")
    for (title, idx) in cluster_dict[max_label]:
        outtext.write("%s" % (textList[idx]))
        outjson.write(json.dumps(jsonList[idx], ensure_ascii=False).encode("utf8") + "\n")
    outtext.close()
    outjson.close()


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print "usage: python cluster_analysis.py textfile jsonfile outfile"
        print "(e.g.) python cluster_analysis.py vectordata/total_10.txt vectordata/total_10.json cluster_title_10.txt"
        exit()

    clusterfile = "cluster_label.txt"
    textfile = sys.argv[1]
    jsonfile = sys.argv[2]
    outfile = sys.argv[3]

    labelList = readCluster(clusterfile)
    textList = readText(textfile)
    jsonList = readJson(jsonfile)
    outputCluster(textList, jsonList, labelList, outfile)
    # extractCluster(textList, jsonList, labelList, 'vectordata/new.txt', 'vectordata/new.json')
Ejemplo n.º 6
0
        if len(titleList) > max_length:
            max_label = label
            max_length = len(titleList)
    outtext = open(outtextfile, 'w')
    outjson = open(outjsonfile, 'w')
    for (title, idx) in cluster_dict[max_label]:
        outtext.write('%s' % (textList[idx]))
        outjson.write(
            json.dumps(jsonList[idx], ensure_ascii=False).encode('utf8') +
            "\n")
    outtext.close()
    outjson.close()


if __name__ == '__main__':
    if len(sys.argv) != 4:
        print 'usage: python cluster_analysis.py textfile jsonfile outfile'
        print '(e.g.) python cluster_analysis.py vectordata/total_10.txt vectordata/total_10.json cluster_title_10.txt'
        exit()

    clusterfile = 'cluster_label.txt'
    textfile = sys.argv[1]
    jsonfile = sys.argv[2]
    outfile = sys.argv[3]

    labelList = readCluster(clusterfile)
    textList = readText(textfile)
    jsonList = readJson(jsonfile)
    outputCluster(textList, jsonList, labelList, outfile)
    #extractCluster(textList, jsonList, labelList, 'vectordata/new.txt', 'vectordata/new.json')