Python Utils.Utils Beispiele, util.Utils.Utils Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: image_classifier.py Projekt: RyanMokarian/Facebook_Users_Profiling

 def get_image_age_training_data():
     util = Utils()
     profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv")
     profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True)
     image_df = util.read_data_to_dataframe("../data/Train/Image/oxford.csv")
     image_df.rename(columns={'userId': 'userid'}, inplace=True)
     merged_df = pd.merge(image_df, profile_df, on='userid')
     merged_df =  merged_df.filter(
         ['faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top',
          'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x',
          'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y',
          'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x',
          'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y',
          'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y',
          'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x',
          'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y',
          'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x',
          'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x',
          'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y',
          'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache',
          'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch'
             , 'age'], axis=1)
     merged_df['age'] = pd.cut(merged_df['age'], [0, 25, 35, 50, 200], labels=["xx-24", "25-34", "35-49", "50-xx"],
                            right=False)
     return merged_df

Beispiel #2

0

Datei anzeigen

 def gen_many_table_file(self, contactfile, nativefile, start, stop, step,
                         hphobic):
     #print ('in gen_many_table')
     # typically something like contacts.txt generated from SMOG or --gconmap option.
     U = Utils()
     U.make_dir('MD')
     U.make_dir('MD/table_files')
     X = conmaps()
     pair = X.get_pairs_ext(contactfile)
     traj = md.load(nativefile)
     count = 0
     #print(len(pair))
     for i in pair:
         count = count + 1
         i = np.reshape(i, (1, 2))
         x = i[0][0]
         y = i[0][1]
         suffix = str(count)
         dist = md.compute_distances(traj, i)
         rcm = (dist[0][0] * 10)
         if hphobic:
             self.gen_db_table_file('db', True, rcm, start, stop, step,
                                    suffix)  # angstroms
         else:
             self.gen_db_table_file('db', False, rcm, start, stop, step,
                                    suffix)  # angstroms

Beispiel #3

0

Datei anzeigen

def main(params=None):
    if params is None:
        params = parse_arguments()
    SEED = params.seed
    t0 = time.time()
    s_t = timer()
    dl = DataLoader(params)

    u = Utils(params, dl)
    timeDelta = int(time.time() - t0)
    print("PreCost:", datetime.timedelta(seconds=timeDelta))
    for repeat in range(params.repeat):
        print("\n\n\n{0} Repeat: {1} {0}".format('-' * 27, repeat))
        set_seed(SEED[repeat] if isinstance(SEED, list) else SEED)
        print("\n\n\n{0}  Seed: {1}  {0}".format('-' * 27, SEED[repeat]))
        if params.mode == 0:
            # Start training
            trainer = Trainer(params, u)
            trainer.log_time['data_loading'] = timer() - s_t
            trainer.train()
            print(trainer.log_time)
            print("Total time taken (in seconds): {}".format(timer() - s_t))

            evaluator = Evaluator(params, u, dl)
            evaluator.evaluate()
        elif params.mode == 1:
            # Evaluate on the test set
            evaluator = Evaluator(params, u, dl)
            evaluator.evaluate()
        else:
            raise NotImplementedError("Unknown mode: {}".format(params.mode))

Beispiel #4

0

Datei anzeigen

class ResultGenerator:
    utils = Utils()

    def generate_results(self,
                         test_data_path="../data/Public_Test/",
                         path_to_results="../data/results"):
        """
        This method Run the test data against model/s and generated XML files
        """
        profiles_path = os.path.join(
            os.path.join(os.path.join(test_data_path, "Profile")),
            "Profile.csv")
        profiles = pd.read_csv(profiles_path)
        model_path = os.path.join(abs_path,
                                  os.path.join("resources", "model.json"))
        model = self.utils.read_json(model_path)
        df_results = generate_df_for_all_users(profiles, model)

        df_results = compute_gender(test_data_path, df_results)
        df_results = compute_age(test_data_path, df_results)
        df_results = compute_personality(test_data_path, df_results)
        df_results = compute_ext(test_data_path, df_results)
        df_results = compute_neu(test_data_path, df_results)

        xml_dictionary = self.generate_xml_from_profiles(df_results)
        self.store_individual_xmls_into_results_path(path_to_results,
                                                     xml_dictionary)

    @staticmethod
    def generate_xml_from_profiles(data_frame):
        """
      TODO this should be fixed
        """
        xml_dictionary = {}
        for index, row in data_frame.iterrows():
            xml = "<user \n id = \"" + row["userid"] + "\" " \
                                                       "\n age_group = \"" + str(
                row["age_group"]) + "\" \n gender = \"" + \
                  str(row["gender"]) + "\" \n extrovert = \"" + str(
                row["ext"]) + "\" \n neurotic = \"" + str(
                row["neu"]) + "\" \n agreeable = \"" + str(
                row["agr"]) + "\" \n conscientious = \"" + str(
                row["con"]) + "\" \n open = \"" + str(row["ope"]) + "\" />"
            xml_dictionary[row["userid"]] = xml

        return xml_dictionary

    def store_individual_xmls_into_results_path(self, path_to_results,
                                                xml_dictionary):
        """
        This method writes content of a dictionary into files choosing key as the nam eof the file
        """
        self.utils.make_directory_if_not_exists(path_to_results)
        for user in xml_dictionary:
            self.utils.write_to_directory(
                os.path.join(path_to_results, user + ".xml"),
                xml_dictionary[user])

Beispiel #5

0

Datei anzeigen

Datei: test_nltk.py Projekt: EdwinPuertas/ProofOfConcept

 def __init__(self):
     self.util = Utils()
     self.newcorpus = ''
     self.list_stopwords = []
     self.list_word = []
     self.token = []
     self.token_clean = []
     self.newcorpus_clean_token = ''
     self.newcorpus, self.list_word = self.util.get_corpus()
     self.stopwords = stopwords.words('spanish')

Beispiel #6

0

Datei anzeigen

Datei: main.py Projekt: Xueelei/CNN-for-Sentence-Classification

def main():
    params = parse_arguments()
    s_t = timer()
    dl = DataLoader(params)
    u = Utils(params, dl)
    evaluator = Evaluator(params, u)
    evaluator.log_time['data_loading'] = timer() - s_t
    evaluator.evaluate_CNN()
    print(evaluator.log_time)
    print("Total time taken (in seconds): {}".format(timer() - s_t))

Beispiel #7

0

Datei anzeigen

    def tree_request(self, endpoint):

        launch = requests.get(endpoint)

        if launch.status_code == 200:

            tree_launches = launch.json()

            for root in tree_launches:
                Utils().clear_screen(3)
                for item in root:
                    print(item, end=':\t')
                    print(root[item])
                    Utils().clear_screen(1)

        else:
            print('Behaviour not expected happend')
            print('Error Code: ', end=' ')
            print(launch.status_code)

Beispiel #8

0

Datei anzeigen

    def simple_request(self, endpoint):

        launch = requests.get(endpoint)

        if launch.status_code == 200:

            for item in launch.json():
                print(item, end=':\t')
                print(launch.json()[item])
                # print(type(launch.json()[item]))
                Utils().clear_screen(1)

        else:
            print('Behaviour not expected happend')
            print('Error Code: ', end=' ')
            print(launch.status_code)

Beispiel #9

0

Datei anzeigen

    def get_data(self, labels=['userid', 'ope', 'con', 'ext', 'agr', 'neu'], include_image=False):
        util = Utils()
        profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv")
        profile_df = profile_df.filter(labels, axis=1)

        nrc_df = util.read_data_to_dataframe("../data/Train/Text/nrc.csv")
        liwc_df = util.read_data_to_dataframe("../data/Train/Text/liwc.csv")
        nrc_df.rename(columns={'userId': 'userid'}, inplace=True)
        liwc_df.rename(columns={'userId': 'userid'}, inplace=True)
        if include_image:
            image_df = self.read_image()
        merged_df = pd.merge(nrc_df, liwc_df, on='userid')
        if include_image:
            merged_df = pd.merge(merged_df, image_df, on='userid')
        merged_df = pd.merge(merged_df, profile_df, on='userid')
        merged_df.drop(['userid'], axis=1, inplace=True)
        return merged_df

Beispiel #10

0

Datei anzeigen

def init(config_name=active_config_name()):
    global app, config, logger, utils, telebot

    config = Config()
    app = Flask(__name__)
    # app = Flask(__name__.split(".")[0])
    app.config.from_object(app_config[config_name])
    telebot = TeleBot(config.TOKEN)
    utils = Utils(telebot)
    # Create logger
    logger = config.create_logger(app)

    with app.app_context():
        register_extensions()
        register_blueprints()
        register_bot(config_name)

        # atexit.register(_handle_exit(app))

        return app

Beispiel #11

0

Datei anzeigen

def load_model_and_run():
    with open('human_pose.json', 'r') as f:
        human_pose = json.load(f)

    topology = trt_pose.coco.coco_category_to_topology(human_pose)
    ut = Utils(topology)
    num_parts = len(human_pose['keypoints'])
    num_links = len(human_pose['skeleton'])
    model = trt_pose.models.resnet18_baseline_att(num_parts,
                                                  2 * num_links).cuda().eval()
    MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth'
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    WIDTH = 224
    HEIGHT = 224

    data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()
    model_trt = torch2trt.torch2trt(model, [data],
                                    fp16_mode=True,
                                    max_workspace_size=1 << 25)
    OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

    t0 = time.time()
    torch.cuda.current_stream().synchronize()
    for i in range(50):
        y = model_trt(data)
    torch.cuda.current_stream().synchronize()
    t1 = time.time()

    print(50.0 / (t1 - t0))

    mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
    std = torch.Tensor([0.229, 0.224, 0.225]).cuda()

    # camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=15)
    camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=15)

    return ut, camera, model_trt

Beispiel #12

0

Datei anzeigen

def main():
    params = parse_arguments()
    s_t = timer()
    dl = DataLoader(params)
    u = Utils(params, dl)

    if params.mode == 0:
        # Start training
        trainer = Trainer(params, u)
        trainer.log_time['data_loading'] = timer() - s_t
        trainer.train()
        print(trainer.log_time)
        print("Total time taken (in seconds): {}".format(timer() - s_t))

    elif params.mode == 1:
        # Evaluate on the test set
        evaluator = Evaluator(params, u, dl)
        evaluator.evaluate()

    else:
        # Nothing implemented yet
        pass

Beispiel #13

0

Datei anzeigen

    def read_liwc(profiles_path="../data/Train/Profile/Profile.csv",
                  liwc_path="../data/Train/Text/liwc.csv"):
        util = Utils()
        profile_df = util.read_data_to_dataframe(profiles_path)
        liwc_df = util.read_data_to_dataframe(liwc_path)
        liwc_df.rename(columns={'userId': 'userid'}, inplace=True)
        merged_df = pd.merge(profile_df, liwc_df, on='userid', how='left')

        return merged_df.filter([
            'userid', 'WC', 'WPS', 'Sixltr', 'Dic', 'Numerals', 'funct',
            'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron',
            'article', 'verb', 'auxverb', 'past', 'present', 'future',
            'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear',
            'social', 'family', 'friend', 'humans', 'affect', 'posemo',
            'negemo', 'anx', 'anger', 'sad', 'cogmech', 'insight', 'cause',
            'discrep', 'tentat', 'certain', 'inhib', 'incl', 'excl', 'percept',
            'see', 'hear', 'feel', 'bio', 'body', 'health', 'sexual', 'ingest',
            'relativ', 'motion', 'space', 'time', 'work', 'achieve', 'leisure',
            'home', 'money', 'relig', 'death', 'assent', 'nonfl', 'filler',
            'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash',
            'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct', 'age'
        ],
                                axis=1)

Beispiel #14

0

Datei anzeigen

 def read_image(profiles_path="../data/Train/Profile/Profile.csv",
                image_path="../data/Train/Image/oxford.csv"):
     util = Utils()
     profile_df = util.read_data_to_dataframe(profiles_path)
     profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True)
     image_df = util.read_data_to_dataframe(image_path)
     image_df.rename(columns={'userId': 'userid'}, inplace=True)
     merged_df = pd.merge(image_df, profile_df, on='userid')
     merged_df = merged_df.filter(
         ['userid', 'faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top',
          'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x',
          'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y',
          'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x',
          'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y',
          'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y',
          'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x',
          'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y',
          'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x',
          'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x',
          'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y',
          'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache',
          'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch'], axis=1)
     return merged_df

Beispiel #15

0

Datei anzeigen

class Main:

    utils = Utils()
    api = AcessAPI()

    while (True):

        utils.print_menu()

        try:

            option = int(input('Insira a opção: '))
            utils.clear_screen(3)

            if option == 1:
                api.simple_request(
                    'https://api.spacexdata.com/v3/launches/next')

            elif option == 2:
                api.simple_request(
                    'https://api.spacexdata.com/v3/launches/latest')

            elif option == 3:
                api.tree_request(
                    'https://api.spacexdata.com/v3/launches/upcoming')

            elif option == 4:
                api.tree_request('https://api.spacexdata.com/v3/launches/past')

            else:
                print(
                    'Opção invalida. Por favor digite um numero válido(1, 2, 3, 4)'
                )
                utils.clear_screen(10)

        except ValueError:
            print('Erro! Digite apenas o numero da sua opção')

Beispiel #16

0

Datei anzeigen

        ],
                       axis=1)
        df["age"] = df_age
        data = df.to_numpy()
        X = data[:, :-1]
        y = data[:, -1]
        clf = LogisticRegression(C=0.004832930238571752, penalty='l2')
        clf.fit(X, y)
        pickle.dump(clf, open("resources/LogisticRegressionAge_v2.sav", 'wb'))

    def fit_model_using_default_ica_rfe(self, clf, df):
        # print("model with all features")
        # self.run_classifier_for_accuracy(df, clf)
        # df_ica = UTILS.apply_fast_ica(df, 10,1)
        # print("model with 10 features ica applied")
        # self.run_classifier_for_accuracy(df_ica, clf)
        # for i in range(124,129):

        df_rfe = UTILS.apply_rfe(df, clf, 130, 1)
        print("model with 10 features rfe applied")
        self.run_classifier_for_accuracy(df_rfe, clf)


if __name__ == "__main__":
    UTILS = Utils()
    clf = LogisticRegression(C=0.004832930238571752, penalty='l2')
    CC = CombinedClassifier()
    df = CC.merge_images_piwc()
    # CC.fit_model_using_default_ica_rfe(clf, df)
    CC.predict_age_using_logistic_regression(df)

Beispiel #17

0

Datei anzeigen

Datei: run_advattackFGSM.py Projekt: qing0991/Bayesian-Adversarial-Learning

params.cnn = '50, 100, 150, 350'
params.locnet = '10,10,10'
params.locnet2 = None
params.locnet3 = None
params.st = True
params.resume = False
params.dropout = 0.5
params.use_pickle = True
params.save_loc = "."
params.outfile = 'gtsrb_kaggle.csv'
#params.train_pickle = params.save_loc + '/train_balanced_preprocessed.p'
params.train_pickle = params.save_loc + '/train.p'
params.extra_debug = False

from util import Utils
utils = Utils()

method_set = ['BayesWRM', 'IFGSM', 'FGSM', 'PGD', 'Bayes', 'ERM']


method_set =['ERM']

for method in method_set:

    if method == 'BayesWRM' or method == 'Bayes':
        model_list = []
        for ii in range(5):
            net = IDSIANetwork(params)
            net.load_state_dict(torch.load(os.path.join('advtrained_models', method+'_'+str(ii)+'.model')))
            net.cuda()
            model_list.append(net)

Beispiel #18

0

Datei anzeigen

Datei: main.py Projekt: HuzaifaQayyum/twitter_bot

from driver import Driver
from login import Login
from followers import Followers
from home import Home
from logout import Logout
from util import Utils

driver = Driver().driver
util = Utils(driver)

def main():
    username = ""
    password = ""

    Login(driver).login(username, password)
    # Followers(driver).follow('jannatmirza07')
    # Home(driver, username).like_and_comment()
    # Logout(driver).logout()

try:
    main()
except Exception as error:
    util.handle_error(error)

Beispiel #19

0

Datei anzeigen

 def __init__(self, driver, username):
     self.driver = driver
     self.utils = Utils(driver)
     self.username = username

Beispiel #20

0

Datei anzeigen

class ajkLoadDataAndInsert():
    city_list = []
    user_agents = []
    headers = {}
    utils = Utils()
    list_data = []
    ips = []
    ipIndex = 0
    ip = {}
    PROXYNAME = 'ipProxy'
    COLUMENAME = 'active_ajk_sec'

    def __init__(self):
        self.Logger = Logger('getAjkData')
        self.user_agents = Headers().user_agents
        self.headers = Headers().headers
        self.cfg = self.utils.pathToConfig()
        self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'),
                           int(self.cfg.get('DB', 'DBPORT')),
                           self.cfg.get('DB', 'DBUSER'),
                           self.cfg.get('DB', 'DBPWD'), 3, 5)

    def load_detail_info_sec(self):
        self.Logger.Info(u'>>>>> 开始抓取详细数据 <<<<<')
        self.ip = self.ips[0]
        for city in self.city_list:
            for page in range(0, int(city['ajk_sec_pages'])):
                city_list_url = city['ajk_sec_url'].replace(
                    '?from=navigation',
                    'p' + str(int(page) + 1) + '/#filtersort')
                self.Logger.Info(u'>>>>> 开始抓取:' + city['city_name'] + '|url:' +
                                 str(city_list_url) + '|ip:' + self.ip['ip'] +
                                 '<<<<<')
                oneCityGetDown = True
                while oneCityGetDown:
                    try:
                        self.Logger.Info(u'>>>>> 使用ip:' + str(self.ip['ip']) +
                                         '<<<<<')
                        proxies = {
                            'http': self.ip['ip'],
                            'https': self.ip['ip']
                        }
                        head = self.headers
                        head['user-agent'] = random.choice(self.user_agents)
                        r = requests.get(city_list_url,
                                         timeout=10,
                                         proxies=proxies,
                                         headers=head)
                        time.sleep(random.random() * 10)
                        soup = BeautifulSoup(r.text, "html.parser")
                        title = soup.find('title').get_text()
                        if '二手房' in title:
                            self.Logger.Info(u'>>>>> ip:' +
                                             str(self.ip['ip']) + u'可用|' +
                                             title + '<<<<<')
                            list = soup.find(attrs={
                                'id': 'houselist-mod-new'
                            }).find_all('li')
                            for l in list[0:]:
                                oneDetailGetDown = True
                                while oneDetailGetDown:
                                    house_title = l.find(attrs={
                                        'class': 'house-title'
                                    }).find('a').attrs['title'].strip()
                                    price = l.find(attrs={
                                        'class': 'price-det'
                                    }).get_text().strip()
                                    try:
                                        detail_url = l.find(
                                            attrs={
                                                'class': 'house-title'
                                            }).find('a').attrs['href']
                                        self.Logger.Info(
                                            u'>>>>> 开始抓取:' + house_title +
                                            '|' + detail_url.split('view/')
                                            [1].split('?')[0] + '|ip:' +
                                            self.ip['ip'] + u'|数据<<<<<')
                                        proxies = {
                                            'http': self.ip['ip'],
                                            'https': self.ip['ip']
                                        }
                                        head['user-agent'] = random.choice(
                                            self.user_agents)
                                        r_detail = requests.get(
                                            detail_url.split('now_time')[0],
                                            timeout=10,
                                            proxies=proxies,
                                            headers=head)
                                        time.sleep(random.random() * 20)
                                        soup_detail = BeautifulSoup(
                                            r_detail.text, "html.parser")
                                        title_detail = soup_detail.find(
                                            'title').get_text()
                                        if '58安居客' in title_detail and '访问验证' not in title_detail:
                                            try:
                                                self.Logger.Info(
                                                    u'>>>>> 开始从列表页获取详情中需要的数据|'
                                                    + title_detail + '<<<<<')
                                                detail_dict = self.get_data(
                                                    soup_detail)
                                                detail_dict['city_id'] = city[
                                                    'city_id']
                                                detail_dict[
                                                    'city_name'] = city[
                                                        'city_name']
                                                detail_dict['source'] = 'ajk'
                                                detail_dict[
                                                    'house_id'] = detail_url.split(
                                                        'view/')[1].split(
                                                            '?')[0]
                                                detail_dict[
                                                    'link_url'] = detail_url.split(
                                                        '?')[0]
                                                detail_dict[
                                                    'title'] = house_title
                                                detail_dict[
                                                    'price'] = self.utils.str_to_num(
                                                        price)
                                                oneDetailGetDown = False
                                                self.insert_update_data(
                                                    detail_dict)
                                            except BaseException, e:
                                                self.Logger.Info(
                                                    u'>>>>> 从列表页获取详情中需要的数据出错' +
                                                    str(e) + '<<<<<')
                                        elif '可能被删除' in title_detail:
                                            self.Logger.Info(u'>>>>> 该链接失效|' +
                                                             title_detail +
                                                             '<<<<<')
                                            oneDetailGetDown = False
                                        else:
                                            self.Logger.Info(
                                                u'>>>>> ip for detail:' +
                                                str(self.ip['ip']) + u'不可用|' +
                                                str(title_detail) + '<<<<<')
                                            result_ip = self.utils.get_active_ip(
                                                self.ips, self.ip, self.Logger,
                                                self.PROXYNAME, self.mysql)
                                            self.ip = result_ip['active_ip']
                                            self.ips = result_ip['ips']
                                    except BaseException, e:
                                        self.Logger.Info(
                                            u'>>>>> ip for detail:' +
                                            str(self.ip['ip']) + u'不可用,超时|' +
                                            str(e) + '<<<<<')
                                        result_ip = self.utils.get_active_ip(
                                            self.ips, self.ip, self.Logger,
                                            self.PROXYNAME, self.mysql)
                                        self.ip = result_ip['active_ip']
                                        self.ips = result_ip['ips']
                            oneCityGetDown = False
                            self.Logger.Info(u'>>>>> ========== city:' +
                                             city['city_name'] + u'第' +
                                             str(int(page) + 1) + u'页' +
                                             u'抓取完成 ========== <<<<<')
                        else:

Beispiel #21

0

Datei anzeigen

 def __init__(self, driver):
     self.driver = driver
     self.twitter_common = TwitterCommon(driver)
     self.utils = Utils(driver)

Beispiel #22

0

Datei anzeigen

 def __init__(self, driver):
     self.driver = driver
     self.utils = Utils(driver)

Beispiel #23

0

Datei anzeigen

	def __init__(self):
		utils = Utils()
		self.cfg = utils.pathToConfig()
		self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'), int(self.cfg.get('DB', 'DBPORT')), self.cfg.get('DB', 'DBUSER'), self.cfg.get('DB', 'DBPWD'), 3, 5)
		print('start get ip')

Beispiel #24

0

Datei anzeigen

Datei: ipProxy.py Projekt: sherlockfeng/dataCapture

class ipProxy():

    user_agents = []
    headers = {}
    utils = Utils()

    def __init__(self):
        self.Loggers = Logger('ipProxy')
        self.user_agents = Headers().user_agents
        self.headers = Headers().headers
        self.cfg = self.utils.pathToConfig()
        self.mysql = Mysql(self.cfg.get('DB', 'DBHOST'),
                           int(self.cfg.get('DB', 'DBPORT')),
                           self.cfg.get('DB', 'DBUSER'),
                           self.cfg.get('DB', 'DBPWD'), 3, 5)

    def get_ip_from_xici(self):
        Loggers = Logger(special_log_file='getProxyXiCi')
        while 1 == 1:
            try:
                avalibleIpsOneWeb = []
                startGetIpTime = time.time()
                startGetIpTimeFormat = time.strftime(
                    "%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
                title = u'西祠代理'
                Loggers.Info('>>>>> ' + startGetIpTimeFormat + '|' + title +
                             u'|开始抓取ip <<<<<')
                url = 'http://www.xicidaili.com/nn/'
                head = self.headers
                head['user-agent'] = random.choice(self.user_agents)
                try:
                    Loggers.Info('>>>>> ' + title + u'|开始请求url ' + url +
                                 ' <<<<<')
                    r = requests.get(url, timeout=10, headers=head)
                    soup = BeautifulSoup(r.text, "html.parser")
                    list = soup.find('table', attrs={
                        'id': 'ip_list'
                    }).find_all('td')
                    strText = ''
                    ips = []
                    for l in list:
                        content = l.get_text().strip()
                        if re.match(r'^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$',
                                    content):
                            strText = content
                        if re.match(
                                r'^([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5])$',
                                content):
                            strText = strText + ':' + content
                            ips.append(strText)
                    endGetIpTime = time.time()
                    endGetIpTimeFormat = time.strftime(
                        "%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
                    Loggers.Info('>>>>> ' + endGetIpTimeFormat + '|' + title +
                                 u'|结束抓取ip,共抓取' + str(len(ips)) + '条 <<<<<')
                    Loggers.Info('>>>>> ' + endGetIpTimeFormat + '|' + title +
                                 u'|开始检查ip是否可用,抓取共耗时' +
                                 str(endGetIpTime - startGetIpTime) + ' <<<<<')

                    for ip in ips:
                        Loggers.Info(u'>>>>> 开始检查ip:' + str(ip) + ' <<<<<')
                        start = time.time()
                        if self.utils.checkIpForAJK(ip):
                            end = time.time()
                            avalibleIpsOneWeb.append({
                                'source': 'xici',
                                'ip': ip,
                                'time': str(end - start)
                            })
                            Loggers.Info('>>>>> ip:' + str(ip) + u' 可用<<<<<')
                        else:
                            Loggers.Info('>>>>> ip:' + str(ip) + u' 不可用<<<<<')
                    endCheckIpTime = time.time()
                    endCheckIpTimeFormat = time.strftime(
                        "%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
                    Loggers.Info('>>>>> ' + endCheckIpTimeFormat + '|' +
                                 title + u'|结束检查ip是否可用,检查共耗时' +
                                 str(endCheckIpTime - endGetIpTime) + ' <<<<<')
                    Loggers.Info('>>>>> ' + title + u'|成功率:' +
                                 str(len(avalibleIpsOneWeb)) + '-' +
                                 str(len(ips)) + ' <<<<<')
                    Loggers.Info('>>>>> ' + endCheckIpTimeFormat + '|' +
                                 title + u'|结束,抓取到' +
                                 str(len(avalibleIpsOneWeb)) + u'条可用ip,共耗时' +
                                 str(endCheckIpTime - startGetIpTime) +
                                 ' <<<<<')
                    # self.avalibleIps.append(avalibleIpsOneWeb)
                    self.insert_data(Loggers, avalibleIpsOneWeb)
                except BaseException, e:
                    Loggers.Error(u'>>>>> 请求url出错 ' + str(e) + '<<<<<')
            except BaseException, e:
                Loggers.Error(u'>>>>> 抓取ip循环出错 ' + str(e) + '<<<<<')
            time.sleep(10)

Beispiel #25

0

Datei anzeigen

        X = Personality.normalize(df)
        y = df[df.columns[-1:]]
        reg.fit(X, y)
        pickle.dump(reg, open("resources/LinearRegression_ext_v2.sav", 'wb'))

    @staticmethod
    def normalize(df):
        X = df.iloc[:, 0:-1]  # independent columns
        X = np.log(X + 1)
        X = (X - X.min()) / (X.max() - X.min())
        X.fillna(0, inplace=True)
        return X


if __name__ == '__main__':
    util = Utils()
    PERSONALITY = Personality()
    df = PERSONALITY.get_data(labels=['userid', 'ext'])
    df = df.filter(
        ['positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust',
         'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx',
         'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext'], axis=1)
    reg = linear_model.LinearRegression()

    X = df.iloc[:, 0:-1]  # independent columns
    X = np.log(X + 1)
    X = (X - X.min()) / (X.max() - X.min())
    X.fillna(0, inplace=True)

    y = df[df.columns[-1:]]