print(r.get_attribute('title')) else: if knownRooms[indexChosen2] != "-1": r = chooseKnowRoom(rooms[knownRoomsMap[knownRooms[indexChosen2]]], startTime) print(r.get_attribute('title')) else: r = chooseRandomroom(rooms, startTime) if r != None: print(r.get_attribute('title') + " Random") if r != None: length = len(r.get_attribute('title')) excelOperator.fillCell(index - startPoint + 1, startTime, r.get_attribute('title')[length - 4:length]) process(r, driver, login, trueUserName, truePassWord) login = False else: print(startDate, "not exist") ################################################################# driver.get("https://ufl.libcal.com/reserve/studyMSL") time.sleep(2) startDate += 1 if startDate == lastDateThisMonth + 1: startDate = 1 rooms = [] index += 1 indexChosen2 += 1 #test StartDate print(startDate)
picture = random.randint(1, 4); #生成随机数 tsk = []; #等待线程结束的 crawler_progress = []; # 爬虫等待线程结束的 if thread > 500: print '\033[1;31;1m Command parse error !!! \033[0m'; exit(); if __name__ == '__main__': hound_db = DB(); blast = blast(); if Dictionaries: #批量导入字典 function.process(hound_db.Dictionaries,Dictionaries); elif imurl and len(imurl) == 2: #导入域名 function.im_url(imurl[0],imurl[1]) elif url: if picture == 1: function.a1(); elif picture == 2: function.a2(); elif picture == 3: function.a3(); elif picture == 4: function.a4(); lis = hound_db.query_all("select lis from lis"); #获取所有字典数据 print "\033[1;35;1m Dictionary--> %i Tools--> hound version--> 1.7 \033[0m \n" % (len(lis));
#sklearn modules from sklearn.utils import shuffle #random permutation from sklearn import preprocessing # Normalize Sparces Matrix from sklearn.feature_extraction.text import CountVectorizer # create at CSR from data from sklearn.linear_model import LogisticRegression#Logistic Classifier from sklearn import svm #SVM Clasiffier from sklearn.naive_bayes import MultinomialNB#Naive BAIse from sklearn.ensemble import BaggingClassifier #utilities import numpy as np import function if __name__ == "__main__": responce = input("Do you want to repopulate feature vectors?[Y/N]:") big_feature_list ,label = function.process(responce) #shuffle data big_feature_list ,label = shuffle(big_feature_list, label, random_state=5) label = label.tolist() #build stopword list Not used # for document in big_feature_list.tolist(): # corpus += document.split(); # fdist = FreqDist(corpus) # print(fdist.most_common(200)) # s = open('stopwords.txt' , 'w') # for pair in fdist.most_common(500): # s.write(str(pair[0])+'\n') # s.close()