def get_image_age_training_data(): util = Utils() profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = util.read_data_to_dataframe("../data/Train/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df = merged_df.filter( ['faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top', 'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x', 'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y', 'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x', 'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y', 'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y', 'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x', 'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y', 'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x', 'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x', 'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y', 'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache', 'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch' , 'age'], axis=1) merged_df['age'] = pd.cut(merged_df['age'], [0, 25, 35, 50, 200], labels=["xx-24", "25-34", "35-49", "50-xx"], right=False) return merged_df
def gen_many_table_file(self, contactfile, nativefile, start, stop, step, hphobic): #print ('in gen_many_table') # typically something like contacts.txt generated from SMOG or --gconmap option. U = Utils() U.make_dir('MD') U.make_dir('MD/table_files') X = conmaps() pair = X.get_pairs_ext(contactfile) traj = md.load(nativefile) count = 0 #print(len(pair)) for i in pair: count = count + 1 i = np.reshape(i, (1, 2)) x = i[0][0] y = i[0][1] suffix = str(count) dist = md.compute_distances(traj, i) rcm = (dist[0][0] * 10) if hphobic: self.gen_db_table_file('db', True, rcm, start, stop, step, suffix) # angstroms else: self.gen_db_table_file('db', False, rcm, start, stop, step, suffix) # angstroms
def main(params=None): if params is None: params = parse_arguments() SEED = params.seed t0 = time.time() s_t = timer() dl = DataLoader(params) u = Utils(params, dl) timeDelta = int(time.time() - t0) print("PreCost:", datetime.timedelta(seconds=timeDelta)) for repeat in range(params.repeat): print("\n\n\n{0} Repeat: {1} {0}".format('-' * 27, repeat)) set_seed(SEED[repeat] if isinstance(SEED, list) else SEED) print("\n\n\n{0} Seed: {1} {0}".format('-' * 27, SEED[repeat])) if params.mode == 0: # Start training trainer = Trainer(params, u) trainer.log_time['data_loading'] = timer() - s_t trainer.train() print(trainer.log_time) print("Total time taken (in seconds): {}".format(timer() - s_t)) evaluator = Evaluator(params, u, dl) evaluator.evaluate() elif params.mode == 1: # Evaluate on the test set evaluator = Evaluator(params, u, dl) evaluator.evaluate() else: raise NotImplementedError("Unknown mode: {}".format(params.mode))
class ResultGenerator: utils = Utils() def generate_results(self, test_data_path="../data/Public_Test/", path_to_results="../data/results"): """ This method Run the test data against model/s and generated XML files """ profiles_path = os.path.join( os.path.join(os.path.join(test_data_path, "Profile")), "Profile.csv") profiles = pd.read_csv(profiles_path) model_path = os.path.join(abs_path, os.path.join("resources", "model.json")) model = self.utils.read_json(model_path) df_results = generate_df_for_all_users(profiles, model) df_results = compute_gender(test_data_path, df_results) df_results = compute_age(test_data_path, df_results) df_results = compute_personality(test_data_path, df_results) df_results = compute_ext(test_data_path, df_results) df_results = compute_neu(test_data_path, df_results) xml_dictionary = self.generate_xml_from_profiles(df_results) self.store_individual_xmls_into_results_path(path_to_results, xml_dictionary) @staticmethod def generate_xml_from_profiles(data_frame): """ TODO this should be fixed """ xml_dictionary = {} for index, row in data_frame.iterrows(): xml = "<user \n id = \"" + row["userid"] + "\" " \ "\n age_group = \"" + str( row["age_group"]) + "\" \n gender = \"" + \ str(row["gender"]) + "\" \n extrovert = \"" + str( row["ext"]) + "\" \n neurotic = \"" + str( row["neu"]) + "\" \n agreeable = \"" + str( row["agr"]) + "\" \n conscientious = \"" + str( row["con"]) + "\" \n open = \"" + str(row["ope"]) + "\" />" xml_dictionary[row["userid"]] = xml return xml_dictionary def store_individual_xmls_into_results_path(self, path_to_results, xml_dictionary): """ This method writes content of a dictionary into files choosing key as the nam eof the file """ self.utils.make_directory_if_not_exists(path_to_results) for user in xml_dictionary: self.utils.write_to_directory( os.path.join(path_to_results, user + ".xml"), xml_dictionary[user])
def __init__(self): self.util = Utils() self.newcorpus = '' self.list_stopwords = [] self.list_word = [] self.token = [] self.token_clean = [] self.newcorpus_clean_token = '' self.newcorpus, self.list_word = self.util.get_corpus() self.stopwords = stopwords.words('spanish')
def main(): params = parse_arguments() s_t = timer() dl = DataLoader(params) u = Utils(params, dl) evaluator = Evaluator(params, u) evaluator.log_time['data_loading'] = timer() - s_t evaluator.evaluate_CNN() print(evaluator.log_time) print("Total time taken (in seconds): {}".format(timer() - s_t))
def tree_request(self, endpoint): launch = requests.get(endpoint) if launch.status_code == 200: tree_launches = launch.json() for root in tree_launches: Utils().clear_screen(3) for item in root: print(item, end=':\t') print(root[item]) Utils().clear_screen(1) else: print('Behaviour not expected happend') print('Error Code: ', end=' ') print(launch.status_code)
def simple_request(self, endpoint): launch = requests.get(endpoint) if launch.status_code == 200: for item in launch.json(): print(item, end=':\t') print(launch.json()[item]) # print(type(launch.json()[item])) Utils().clear_screen(1) else: print('Behaviour not expected happend') print('Error Code: ', end=' ') print(launch.status_code)
def get_data(self, labels=['userid', 'ope', 'con', 'ext', 'agr', 'neu'], include_image=False): util = Utils() profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv") profile_df = profile_df.filter(labels, axis=1) nrc_df = util.read_data_to_dataframe("../data/Train/Text/nrc.csv") liwc_df = util.read_data_to_dataframe("../data/Train/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) if include_image: image_df = self.read_image() merged_df = pd.merge(nrc_df, liwc_df, on='userid') if include_image: merged_df = pd.merge(merged_df, image_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid'], axis=1, inplace=True) return merged_df
def init(config_name=active_config_name()): global app, config, logger, utils, telebot config = Config() app = Flask(__name__) # app = Flask(__name__.split(".")[0]) app.config.from_object(app_config[config_name]) telebot = TeleBot(config.TOKEN) utils = Utils(telebot) # Create logger logger = config.create_logger(app) with app.app_context(): register_extensions() register_blueprints() register_bot(config_name) # atexit.register(_handle_exit(app)) return app
def load_model_and_run(): with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) ut = Utils(topology) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' model.load_state_dict(torch.load(MODEL_WEIGHTS)) WIDTH = 224 HEIGHT = 224 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) t0 = time.time() torch.cuda.current_stream().synchronize() for i in range(50): y = model_trt(data) torch.cuda.current_stream().synchronize() t1 = time.time() print(50.0 / (t1 - t0)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() # camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=15) camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=15) return ut, camera, model_trt
def main(): params = parse_arguments() s_t = timer() dl = DataLoader(params) u = Utils(params, dl) if params.mode == 0: # Start training trainer = Trainer(params, u) trainer.log_time['data_loading'] = timer() - s_t trainer.train() print(trainer.log_time) print("Total time taken (in seconds): {}".format(timer() - s_t)) elif params.mode == 1: # Evaluate on the test set evaluator = Evaluator(params, u, dl) evaluator.evaluate() else: # Nothing implemented yet pass
def read_liwc(profiles_path="../data/Train/Profile/Profile.csv", liwc_path="../data/Train/Text/liwc.csv"): util = Utils() profile_df = util.read_data_to_dataframe(profiles_path) liwc_df = util.read_data_to_dataframe(liwc_path) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(profile_df, liwc_df, on='userid', how='left') return merged_df.filter([ 'userid', 'WC', 'WPS', 'Sixltr', 'Dic', 'Numerals', 'funct', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'article', 'verb', 'auxverb', 'past', 'present', 'future', 'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear', 'social', 'family', 'friend', 'humans', 'affect', 'posemo', 'negemo', 'anx', 'anger', 'sad', 'cogmech', 'insight', 'cause', 'discrep', 'tentat', 'certain', 'inhib', 'incl', 'excl', 'percept', 'see', 'hear', 'feel', 'bio', 'body', 'health', 'sexual', 'ingest', 'relativ', 'motion', 'space', 'time', 'work', 'achieve', 'leisure', 'home', 'money', 'relig', 'death', 'assent', 'nonfl', 'filler', 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct', 'age' ], axis=1)
def read_image(profiles_path="../data/Train/Profile/Profile.csv", image_path="../data/Train/Image/oxford.csv"): util = Utils() profile_df = util.read_data_to_dataframe(profiles_path) profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = util.read_data_to_dataframe(image_path) image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df = merged_df.filter( ['userid', 'faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top', 'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x', 'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y', 'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x', 'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y', 'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y', 'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x', 'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y', 'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x', 'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x', 'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y', 'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache', 'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch'], axis=1) return merged_df
class Main: utils = Utils() api = AcessAPI() while (True): utils.print_menu() try: option = int(input('Insira a opção: ')) utils.clear_screen(3) if option == 1: api.simple_request( 'https://api.spacexdata.com/v3/launches/next') elif option == 2: api.simple_request( 'https://api.spacexdata.com/v3/launches/latest') elif option == 3: api.tree_request( 'https://api.spacexdata.com/v3/launches/upcoming') elif option == 4: api.tree_request('https://api.spacexdata.com/v3/launches/past') else: print( 'Opção invalida. Por favor digite um numero válido(1, 2, 3, 4)' ) utils.clear_screen(10) except ValueError: print('Erro! Digite apenas o numero da sua opção')
], axis=1) df["age"] = df_age data = df.to_numpy() X = data[:, :-1] y = data[:, -1] clf = LogisticRegression(C=0.004832930238571752, penalty='l2') clf.fit(X, y) pickle.dump(clf, open("resources/LogisticRegressionAge_v2.sav", 'wb')) def fit_model_using_default_ica_rfe(self, clf, df): # print("model with all features") # self.run_classifier_for_accuracy(df, clf) # df_ica = UTILS.apply_fast_ica(df, 10,1) # print("model with 10 features ica applied") # self.run_classifier_for_accuracy(df_ica, clf) # for i in range(124,129): df_rfe = UTILS.apply_rfe(df, clf, 130, 1) print("model with 10 features rfe applied") self.run_classifier_for_accuracy(df_rfe, clf) if __name__ == "__main__": UTILS = Utils() clf = LogisticRegression(C=0.004832930238571752, penalty='l2') CC = CombinedClassifier() df = CC.merge_images_piwc() # CC.fit_model_using_default_ica_rfe(clf, df) CC.predict_age_using_logistic_regression(df)
params.cnn = '50, 100, 150, 350' params.locnet = '10,10,10' params.locnet2 = None params.locnet3 = None params.st = True params.resume = False params.dropout = 0.5 params.use_pickle = True params.save_loc = "." params.outfile = 'gtsrb_kaggle.csv' #params.train_pickle = params.save_loc + '/train_balanced_preprocessed.p' params.train_pickle = params.save_loc + '/train.p' params.extra_debug = False from util import Utils utils = Utils() method_set = ['BayesWRM', 'IFGSM', 'FGSM', 'PGD', 'Bayes', 'ERM'] method_set =['ERM'] for method in method_set: if method == 'BayesWRM' or method == 'Bayes': model_list = [] for ii in range(5): net = IDSIANetwork(params) net.load_state_dict(torch.load(os.path.join('advtrained_models', method+'_'+str(ii)+'.model'))) net.cuda() model_list.append(net)
from driver import Driver from login import Login from followers import Followers from home import Home from logout import Logout from util import Utils driver = Driver().driver util = Utils(driver) def main(): username = "" password = "" Login(driver).login(username, password) # Followers(driver).follow('jannatmirza07') # Home(driver, username).like_and_comment() # Logout(driver).logout() try: main() except Exception as error: util.handle_error(error)
def __init__(self, driver, username): self.driver = driver self.utils = Utils(driver) self.username = username
class ajkLoadDataAndInsert(): city_list = [] user_agents = [] headers = {} utils = Utils() list_data = [] ips = [] ipIndex = 0 ip = {} PROXYNAME = 'ipProxy' COLUMENAME = 'active_ajk_sec' def __init__(self): self.Logger = Logger('getAjkData') self.user_agents = Headers().user_agents self.headers = Headers().headers self.cfg = self.utils.pathToConfig() self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'), int(self.cfg.get('DB', 'DBPORT')), self.cfg.get('DB', 'DBUSER'), self.cfg.get('DB', 'DBPWD'), 3, 5) def load_detail_info_sec(self): self.Logger.Info(u'>>>>> 开始抓取详细数据 <<<<<') self.ip = self.ips[0] for city in self.city_list: for page in range(0, int(city['ajk_sec_pages'])): city_list_url = city['ajk_sec_url'].replace( '?from=navigation', 'p' + str(int(page) + 1) + '/#filtersort') self.Logger.Info(u'>>>>> 开始抓取:' + city['city_name'] + '|url:' + str(city_list_url) + '|ip:' + self.ip['ip'] + '<<<<<') oneCityGetDown = True while oneCityGetDown: try: self.Logger.Info(u'>>>>> 使用ip:' + str(self.ip['ip']) + '<<<<<') proxies = { 'http': self.ip['ip'], 'https': self.ip['ip'] } head = self.headers head['user-agent'] = random.choice(self.user_agents) r = requests.get(city_list_url, timeout=10, proxies=proxies, headers=head) time.sleep(random.random() * 10) soup = BeautifulSoup(r.text, "html.parser") title = soup.find('title').get_text() if '二手房' in title: self.Logger.Info(u'>>>>> ip:' + str(self.ip['ip']) + u'可用|' + title + '<<<<<') list = soup.find(attrs={ 'id': 'houselist-mod-new' }).find_all('li') for l in list[0:]: oneDetailGetDown = True while oneDetailGetDown: house_title = l.find(attrs={ 'class': 'house-title' }).find('a').attrs['title'].strip() price = l.find(attrs={ 'class': 'price-det' }).get_text().strip() try: detail_url = l.find( attrs={ 'class': 'house-title' }).find('a').attrs['href'] self.Logger.Info( u'>>>>> 开始抓取:' + house_title + '|' + detail_url.split('view/') [1].split('?')[0] + '|ip:' + self.ip['ip'] + u'|数据<<<<<') proxies = { 'http': self.ip['ip'], 'https': self.ip['ip'] } head['user-agent'] = random.choice( self.user_agents) r_detail = requests.get( detail_url.split('now_time')[0], timeout=10, proxies=proxies, headers=head) time.sleep(random.random() * 20) soup_detail = BeautifulSoup( r_detail.text, "html.parser") title_detail = soup_detail.find( 'title').get_text() if '58安居客' in title_detail and '访问验证' not in title_detail: try: self.Logger.Info( u'>>>>> 开始从列表页获取详情中需要的数据|' + title_detail + '<<<<<') detail_dict = self.get_data( soup_detail) detail_dict['city_id'] = city[ 'city_id'] detail_dict[ 'city_name'] = city[ 'city_name'] detail_dict['source'] = 'ajk' detail_dict[ 'house_id'] = detail_url.split( 'view/')[1].split( '?')[0] detail_dict[ 'link_url'] = detail_url.split( '?')[0] detail_dict[ 'title'] = house_title detail_dict[ 'price'] = self.utils.str_to_num( price) oneDetailGetDown = False self.insert_update_data( detail_dict) except BaseException, e: self.Logger.Info( u'>>>>> 从列表页获取详情中需要的数据出错' + str(e) + '<<<<<') elif '可能被删除' in title_detail: self.Logger.Info(u'>>>>> 该链接失效|' + title_detail + '<<<<<') oneDetailGetDown = False else: self.Logger.Info( u'>>>>> ip for detail:' + str(self.ip['ip']) + u'不可用|' + str(title_detail) + '<<<<<') result_ip = self.utils.get_active_ip( self.ips, self.ip, self.Logger, self.PROXYNAME, self.mysql) self.ip = result_ip['active_ip'] self.ips = result_ip['ips'] except BaseException, e: self.Logger.Info( u'>>>>> ip for detail:' + str(self.ip['ip']) + u'不可用,超时|' + str(e) + '<<<<<') result_ip = self.utils.get_active_ip( self.ips, self.ip, self.Logger, self.PROXYNAME, self.mysql) self.ip = result_ip['active_ip'] self.ips = result_ip['ips'] oneCityGetDown = False self.Logger.Info(u'>>>>> ========== city:' + city['city_name'] + u'第' + str(int(page) + 1) + u'页' + u'抓取完成 ========== <<<<<') else:
def __init__(self, driver): self.driver = driver self.twitter_common = TwitterCommon(driver) self.utils = Utils(driver)
def __init__(self, driver): self.driver = driver self.utils = Utils(driver)
def __init__(self): utils = Utils() self.cfg = utils.pathToConfig() self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'), int(self.cfg.get('DB', 'DBPORT')), self.cfg.get('DB', 'DBUSER'), self.cfg.get('DB', 'DBPWD'), 3, 5) print('start get ip')
class ipProxy(): user_agents = [] headers = {} utils = Utils() def __init__(self): self.Loggers = Logger('ipProxy') self.user_agents = Headers().user_agents self.headers = Headers().headers self.cfg = self.utils.pathToConfig() self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'), int(self.cfg.get('DB', 'DBPORT')), self.cfg.get('DB', 'DBUSER'), self.cfg.get('DB', 'DBPWD'), 3, 5) def get_ip_from_xici(self): Loggers = Logger(special_log_file='getProxyXiCi') while 1 == 1: try: avalibleIpsOneWeb = [] startGetIpTime = time.time() startGetIpTimeFormat = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(time.time())) title = u'西祠代理' Loggers.Info('>>>>> ' + startGetIpTimeFormat + '|' + title + u'|开始抓取ip <<<<<') url = 'http://www.xicidaili.com/nn/' head = self.headers head['user-agent'] = random.choice(self.user_agents) try: Loggers.Info('>>>>> ' + title + u'|开始请求url ' + url + ' <<<<<') r = requests.get(url, timeout=10, headers=head) soup = BeautifulSoup(r.text, "html.parser") list = soup.find('table', attrs={ 'id': 'ip_list' }).find_all('td') strText = '' ips = [] for l in list: content = l.get_text().strip() if re.match(r'^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$', content): strText = content if re.match( r'^([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5])$', content): strText = strText + ':' + content ips.append(strText) endGetIpTime = time.time() endGetIpTimeFormat = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(time.time())) Loggers.Info('>>>>> ' + endGetIpTimeFormat + '|' + title + u'|结束抓取ip,共抓取' + str(len(ips)) + '条 <<<<<') Loggers.Info('>>>>> ' + endGetIpTimeFormat + '|' + title + u'|开始检查ip是否可用,抓取共耗时' + str(endGetIpTime - startGetIpTime) + ' <<<<<') for ip in ips: Loggers.Info(u'>>>>> 开始检查ip:' + str(ip) + ' <<<<<') start = time.time() if self.utils.checkIpForAJK(ip): end = time.time() avalibleIpsOneWeb.append({ 'source': 'xici', 'ip': ip, 'time': str(end - start) }) Loggers.Info('>>>>> ip:' + str(ip) + u' 可用<<<<<') else: Loggers.Info('>>>>> ip:' + str(ip) + u' 不可用<<<<<') endCheckIpTime = time.time() endCheckIpTimeFormat = time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(time.time())) Loggers.Info('>>>>> ' + endCheckIpTimeFormat + '|' + title + u'|结束检查ip是否可用,检查共耗时' + str(endCheckIpTime - endGetIpTime) + ' <<<<<') Loggers.Info('>>>>> ' + title + u'|成功率:' + str(len(avalibleIpsOneWeb)) + '-' + str(len(ips)) + ' <<<<<') Loggers.Info('>>>>> ' + endCheckIpTimeFormat + '|' + title + u'|结束,抓取到' + str(len(avalibleIpsOneWeb)) + u'条可用ip,共耗时' + str(endCheckIpTime - startGetIpTime) + ' <<<<<') # self.avalibleIps.append(avalibleIpsOneWeb) self.insert_data(Loggers, avalibleIpsOneWeb) except BaseException, e: Loggers.Error(u'>>>>> 请求url出错 ' + str(e) + '<<<<<') except BaseException, e: Loggers.Error(u'>>>>> 抓取ip循环出错 ' + str(e) + '<<<<<') time.sleep(10)
X = Personality.normalize(df) y = df[df.columns[-1:]] reg.fit(X, y) pickle.dump(reg, open("resources/LinearRegression_ext_v2.sav", 'wb')) @staticmethod def normalize(df): X = df.iloc[:, 0:-1] # independent columns X = np.log(X + 1) X = (X - X.min()) / (X.max() - X.min()) X.fillna(0, inplace=True) return X if __name__ == '__main__': util = Utils() PERSONALITY = Personality() df = PERSONALITY.get_data(labels=['userid', 'ext']) df = df.filter( ['positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext'], axis=1) reg = linear_model.LinearRegression() X = df.iloc[:, 0:-1] # independent columns X = np.log(X + 1) X = (X - X.min()) / (X.max() - X.min()) X.fillna(0, inplace=True) y = df[df.columns[-1:]]