Ejemplo n.º 1
0
def search_one_account_passage_by_id(id):
    # 使用 cursor() 方法创建一个游标对象 cursor
    db = connect()
    cursor = db.cursor()
    # mc = MysqlClient()
    # SQL 查询语句
    sql = "SELECT title, passage_link, official_account_id FROM passage_link_list \
              WHERE  official_account_id = '{}'".format(id)
    try:
        # results = mc.select_many(sql)
        # 获取所有记录列表
        cursor.execute(sql)
        results = cursor.fetchall()
        for i in range(len(results)):
           print(results[i])
        # return json.dumps(results[1], ensure_ascii=False)
        cursor.close()
        db.close()
        return results
    except Exception as e:
        db.rollback()  # 发生错误时回滚
        # mc.end()
        logger.error(str(e))
        logger.warning("Failed to search the history passage.")
        return False
Ejemplo n.º 2
0
    def start_scraping(self):
        # session_begin_date=session_begin_date if session_begin_date else scraping_cfg.session_begin_date
        # session_end_date = session_end_date if session_end_date else scraping_cfg.session_end_date

        if not (self.scrape_profiles or self.scrape_tweets):
            logger.warning(f'Nothing to do. Did you forget "profiles" or "tweets" instruction?')
            return None
        if self.usersnames_df.empty:
            logger.warning(f'Nothing to do. Did you forget to set "all_users" or "users_list"? Or all users already exist?')
            return None
        processes = min(len(self.usersnames_df), self.n_processes)
        if self.scrape_profiles:
            self._populate_proxy_queue()
            print(self.usersnames_df)
            # mp_iterable = [(username,) for _, (_, username) in self.usersnames_df.iterrows()]
            mp_iterable = [(username,) for username in self.usersnames_df['username']]
            with mp.Pool(processes=processes) as pool:
                pool.starmap(self.scrape_a_user_profile, mp_iterable)
        if self.scrape_tweets:
            self._populate_proxy_queue()
            if self.rescrape:
                mp_iterable = [(username, begin_date, end_date) for _, (username, begin_date, end_date) in self.usersnames_df.iterrows()]

            else:
                mp_iterable = [(username, scraping_cfg.session_begin_date, scraping_cfg.session_end_date) for username in self.usersnames_df['username']]
            with mp.Pool(processes=processes) as pool:
                pool.starmap(self.scrape_a_user_tweets, mp_iterable)
Ejemplo n.º 3
0
    def getindex(self):
        try:
            index_info = self.sess.get(self.indexurl,
                                       headers=self.header,
                                       timeout=5,
                                       proxies=self.proxy)
            index_html = index_info.text
        except:
            # 如果访问失败了
            # 换一个代理IP重试一遍
            self.count = self.count + 1
            logger.warning("代理ip不可用" + str(self.count) + "次")
            self.proxy = server.get_proxy()
            return self.getindex()
        if (self.count >= 3):
            #如果换了两个IP还不行返回失败
            return False

        pattern = re.compile(
            '[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}-')
        try:
            sec_str = pattern.findall(index_html)
            if (sec_str == None):
                return False
            hash_str = sec_str[0]
            hash_str = hash_str + self.password
            sha1 = hashlib.sha1()
            sha1.update(hash_str.encode('utf-8'))
            res = sha1.hexdigest()
            return res
        except:
            return False
Ejemplo n.º 4
0
def insert_wechat_account(official_account_name):
    # 使用 cursor() 方法创建一个游标对象 cursor
    db = connect()
    cursor = db.cursor()
    # SQL 查询语句
    sql = """INSERT INTO wechat_account_list (official_account_name) VALUES (%s)"""
    try:
        # 执行SQL语句
        cursor.execute(sql, (official_account_name))
        # 获取所有记录列表
        sql = "SELECT id FROM wechat_account_list \
                   WHERE  official_account_name = '{}'".format(official_account_name)
        cursor.execute(sql)
        db.commit()
        # 获取所有记录列表
        results = cursor.fetchall()
        cursor.close()
        db.close()
        official_account_id = results[0][0]
        return official_account_id
    except Exception as e:
        db.rollback()  # 发生错误时回滚
        logger.error(str(e))
        logger.warning("Failed to add a official account.")
        return False
Ejemplo n.º 5
0
def save_excel_file(path, dataframes, sheet_names=None, extension=".xlsx",
                    check_path=False, to_excel_kwargs=None, **options):
    """Write dataframes to disk.

    :param path: Excel file path  (to be checked before with 'save_file' function).
    :param dataframes: dataframe or list of dataframes
    :param sheet_names:  sheet name or ordered list of sheet names
    :param extension: Excel file extension
    :param check_path: check path with save_file
    :param to_excel_kwargs: keyword arguments for _write_excel function
    :param options: options for save_file function
    :return: path
    """
    if check_path:
        path = save_file(path, extension=extension, **options)
    if isinstance(dataframes, pd.DataFrame):
        dataframes = [dataframes]
    length = len(dataframes)
    if sheet_names is None:
        sheet_names = ["Sheet{}".format(i + 1) for i in range(length)]
    elif isinstance(sheet_names, str):
        sheet_names = [sheet_names]
    if len(sheet_names) != length:
        logger.warning("Invalid sheet names! Default sheet names will be used.")
    to_excel_kwargs = {} if to_excel_kwargs is None else to_excel_kwargs
    n_path = _write_excel(Path(path), dataframes, sheet_names, **to_excel_kwargs)
    return n_path
Ejemplo n.º 6
0
def handle_file_error(err, func, path, args=None, kwargs=None,
                      pos_path=0, key_path=None, change_path_func=save_file,
                      title='', msg='', return_if_ignore=None):
    """If PermissionError when opening/saving file, propose to retry, change file path or cancel

    :param err: exception
    :param func: function to execute if the user wants to retry
    :param path: file path
    :param args: args to pass to func
    :param kwargs: kwargs to pass to func
    :param pos_path: position of the positional argument path in func (only if key_path is None)
    :param key_path: name of the keyword argument path in func (if None, positional argument is used)
    :param change_path_func: function to get a new path, with no positional argument and 'initialdir' keyword argument
    :param title: title of the error
    :param msg: message of the error
    :param return_if_ignore: return if Ignore option is selected
    :return:
    """
    logger.debug(err)
    args = args or []
    kwargs = kwargs or {}
    title = title or 'File error!'
    msg = msg or "Unknown error with file '{}'. \nOriginal error: {}".format(path, err)
    logger.warning('User action needed!')
    res = messagebox.askcustomquestion(title=title, message=msg,
                                       choices=["Retry", "Rename automatically", "Change file path",
                                                "Ignore", "Debug (developer only)", "Cancel"])
    if res == "Retry":
        if key_path is not None:
            kwargs[key_path] = path
        else:
            args.insert(pos_path, path)
        return func(*args, **kwargs)
    if res == "Rename automatically":
        n_path = _handle_existing_file_conflict(path=path, overwrite='rename')
        if key_path is not None:
            kwargs[key_path] = n_path
        else:
            args.insert(pos_path, n_path)
        return func(*args, **kwargs)
    elif res == "Change file path":
        initialdir = Path(path).dirname if Path(path).dirname.exists else None
        if key_path is not None:
            kwargs[key_path] = change_path_func(initialdir=initialdir)
        else:
            args.insert(pos_path, change_path_func(initialdir=initialdir))
        return func(*args, **kwargs)
    elif res == "Ignore":
        logger.warning("Function ignored!")
        logger.debug("Function '{}' with path '{}' ignored!")
        return return_if_ignore
    elif res == "Debug (developer only)":
        pdb.set_trace()
    elif res in [None, "Cancel"]:
        err = UnknownError if not isinstance(err, BaseException) else err
        logger.exception(err)
        raise err.__class__(err)
    else:
        raise TypeError("Bad return of function 'messagebox.askcustomquestion': '{}'".format(res))
Ejemplo n.º 7
0
 def rescrape_dead_periods(self, session_id=-1):
     self.rescrape = True
     self.usersnames_df = get_dead_tweets_periods(session_id=session_id)
     logger.warning(f'Rescraping following periods')
     print(self.usersnames_df)
     self.session_id *= -1
     self.scrape_tweets = True
     return self
Ejemplo n.º 8
0
 def _populate_proxy_queue(self): # Todo: Trow out every proxy with problems. Reload fast besed on ratio ok/fail
     proxy_df = get_proxies(max_delay=self.max_proxy_delay)
     # Shuffle the proxies otherwise always same order
     proxy_df = proxy_df.sample(frac=1., replace=False)
     for _, proxy in proxy_df.iterrows():
         # self.proxy_queue.put((proxy['ip'], proxy['port']))
         self.proxy_queue.put({'ip': proxy['ip'], 'port': proxy['port']})
     logger.warning(f'Proxy queue poulates. Contains {self.proxy_queue.qsize()} servers')
Ejemplo n.º 9
0
def _handle_existing_file_conflict(path: Path, overwrite='ask', backup=False, **kwargs) -> Union[Path, None]:
    """Handle conflict if a file already exist by opening adapted dialog."""
    # overwrite 'ask': ask user to modify overwrite arg into 'overwrite' (Yes) or 'rename' (No) or return None (Cancel)
    if overwrite == 'ask':
        logger.warning('User action needed!')
        res = messagebox.askyesnocancel(title="File existing",
                                        message="File {} already exists.\nDo you want to overwrite it?"
                                                "\n\nIf you select 'No', the file will be "
                                                "renamed automatically.".format(path))
        if res is None:
            logger.info("'Save file' operation cancelled by the user.".format(path))
            logger.debug("The path 'None' will be returned.")

            return None
        if res:
            overwrite = 'overwrite'
        else:
            overwrite = 'rename'

    # overwrite 'rename' or False: add '-i' at the end of the path to make it unique, where 'i' is an integer.
    if overwrite == 'rename' or overwrite is False:
        r_path, r_ext = path.splitext
        # def rename_method1(r_path, sep='-'):#todo
        ls_end = re.findall(r'-(\d+)$', r_path)
        if ls_end:  # if the path already ends by '-i', change end to 'i+1'
            end = ls_end[0]
            r_path = r_path[:-(len(end) + 1)]
            added_ending = "-{}".format(int(end) + 1)
        else:
            added_ending = "-1"
        n_path = r_path + added_ending + r_ext
        logger.debug("Path {} changed to {} (renaming)".format(path, n_path))
        return save_file(n_path, overwrite=overwrite, backup=backup, **kwargs)

    # backup True: backup the old file
    if backup and path.isfile:
        suffix = datetime.datetime.now().strftime("-%Y-%m-%d_%H-%M_") + uuid.uuid4().hex[:5]
        try:
            shutil.copyfile(path, path.radix + suffix + path.ext)
        except (PermissionError, FileNotFoundError) as err:
            logger.exception(err)
            logger.error("Failed to backup previous configuration file.")

    # overwrite 'overwrite' or True: do not modify the path and make the old file writable to allow overwriting
    if overwrite == 'overwrite' or overwrite is True:
        logger.debug("File {} will be overwritten".format(path))
        _set_writable(path)

    # overwrite 'ignore': do nothing
    elif overwrite == 'ignore':
        pass
    # other case of overwrite: do nothing (same as 'ignore')
    else:
        logger.warning("Unexpected argument 'overwrite'! File {} will be overwritten".format(path))
    return path
Ejemplo n.º 10
0
def get_random_ua():
    count = 0
    try:
        ua = UserAgent(verify_ssl=False)
        return ua.random
    except FakeUserAgentError as e:
        count += 1
        if count < 5:
            get_random_ua()
        else:
            logger.warning(e)
Ejemplo n.º 11
0
 async def test_proxy(self, proxy):
     try:
         if len(proxy.split('-')[1]) > 1:
             if not await self.is_high_anon(
                     proxy.split('-')[1].replace('https://', 'http://')):
                 self.redis.adjust_score(proxy,
                                         -self.minus_every_time,
                                         key=self.key)
             else:
                 self.redis.adjust_score(proxy, +1, key=self.key)
     except CancelledError as e:
         logger.warning('proxy: %s, %s' % (proxy, e))
Ejemplo n.º 12
0
 def _handle_error(self,
                   flag,
                   e,
                   username,
                   proxy,
                   fail_counter,
                   period_begin_date=None,
                   period_end_date=None):
     txt = f'{flag} | {username}, {period_begin_date}/{period_end_date}, {proxy["ip"]}:{proxy["port"]}, queue={self.proxy_queue.qsize()}, fail={fail_counter}'
     logger.warning(txt)
     logger.warning(e)
     update_proxy_stats(flag, proxy)
Ejemplo n.º 13
0
def insert_account_passage_link(title, passage_link, official_account_id):
    # 使用 cursor() 方法创建一个游标对象 cursor
    cursor = db.cursor()
    # SQL 查询语句
    sql = "INSERT INTO passage_link_list(title, passage_link, official_account_id) select '{}', '{}', '{}' from DUAL where not exists (select title, passage_link, official_account_id from passage_link_list where passage_link = '{}')".format(
        title, passage_link, official_account_id, passage_link)
    try:
        # 执行SQL语句
        cursor.execute(sql)
        # 获取所有记录列表
        db.commit()
    except Exception as e:
        db.rollback()  # 发生错误时回滚
        logger.error(str(e))
        logger.warning("新增公众号推文失败。")
        return False
Ejemplo n.º 14
0
    def _populate_proxy_queue(self):

        update_proxies_ratio()
        proxy_df = get_proxies()
        columns = [
            'datetime', 'ip', 'port', 'source', 'delay', 'blacklisted',
            'scrape_n_failed', 'scrape_n_used', 'scrape_n_used_total',
            'scrape_n_failed_total', 'last_flag', 'fail_ratio'
        ]
        # Sort by ratio
        proxy_df.sort_values('fail_ratio', inplace=True)
        print(proxy_df[columns])
        for _, proxy in proxy_df.iterrows():
            self.proxy_queue.put({'ip': proxy['ip'], 'port': proxy['port']})
        logger.warning(
            f'Proxy queue poulated. Contains {self.proxy_queue.qsize()} servers'
        )
Ejemplo n.º 15
0
def search_one_account_passage_by_id(id):
    # 使用 cursor() 方法创建一个游标对象 cursor
    cursor = db.cursor()
    # SQL 查询语句
    sql = "SELECT title, passage_link, official_account_id FROM passage_link_list \
              WHERE  official_account_id = '{}'".format(id)
    try:
        cursor.execute(sql)
        # 获取所有记录列表
        results = cursor.fetchall()
        for i in range(len(results)):
            print(results[i])
        return results
    except Exception as e:
        db.rollback()  # 发生错误时回滚
        logger.error(str(e))
        logger.warning("查询历史推文失败。")
        return False
Ejemplo n.º 16
0
def q_save_a_proxy(proxy):
    collection = get_collection()
    d = proxy
    # New proxies have not been tested
    d['delay'] = 999999
    d['blacklisted'] = True
    d['error_code'] = 0
    d['test_n_blacklisted'] = 0
    d['test_n_tested'] = 0
    d['scrape_success'] = True
    d['scrape_n_used'] = 0
    d['scrape_n_failed'] = 0
    d['scrape_n_used_total'] = 0
    d['scrape_n_failed_total'] = 0
    try:
        collection.insert_one(d)
    except DuplicateKeyError as e:
        logger.warning(f"Duplicate proxy: {proxy['ip']}:{proxy['port']}")
Ejemplo n.º 17
0
def choose_filedialog(dialog_type: str,
                      multiple_paths: bool = False,
                      return_on_cancellation: str = None,
                      behavior_on_cancellation: str = 'ignore',
                      initialdir: str = None,
                      filetypes: list = None,
                      title: str = None,
                      **kwargs) -> Union[tuple, Path]:
    """Open a filedialog window."""
    # Check dialog type.
    if dialog_type == 'save':
        user_input_func = filedialog.asksaveasfilename
    elif dialog_type == 'open':
        if multiple_paths:  # selection of multiple files
            user_input_func = filedialog.askopenfilenames
        else:  # selection of a unique file
            user_input_func = filedialog.askopenfilename
    elif dialog_type == 'open_dir':
        user_input_func = filedialog.askdirectory
    else:
        msg = "Argument 'dialog_type' must be 'save, 'open' or 'open_dir'."
        logger.error(msg)
        raise ValueError(msg)

    # Check inputs.
    if filetypes is None or not isinstance_filetypes(filetypes):
        filetypes = [("all files", "*.*")]
    if not isinstance(initialdir, str) or not Path(initialdir).isdir:
        initialdir = None
    if not isinstance(title, str):
        title = None

    # Get filenames.
    logger.warning('User action needed!')
    filetypes_kwargs = {} if dialog_type == 'open_dir' else dict(
        filetypes=filetypes)
    path = user_input_func(title=title,
                           initialdir=initialdir,
                           **filetypes_kwargs)
    if not path:  # if no file selected
        # raise an anomaly with flag behavior_on_cancellation ('ask', 'ignore', 'warning' or 'error').
        raise_no_file_selected_anomaly(flag=behavior_on_cancellation)
        return Path(return_on_cancellation)
    return Path(path)
Ejemplo n.º 18
0
 def add(self, proxy, name=PROXY_ORIGINAL, score=INITIAL_SCORE):
     """
     添加代理,设置分数为最高
     :param score: 默认分值
     :param name:键名
     :param proxy: 代理
     :return: 添加结果
     """
     ip_port = proxy.split('-')[1]
     match = self.pattern.match(ip_port)
     if match:
         if not self.db.zscore(name, proxy):
             return self.db.zadd(name, {proxy: score})
         else:
             logger.info('proxy %s already exists' % proxy)
             if int(self.db.zscore(name, proxy)) == 100:
                 return self.db.zadd(name, {proxy: INITIAL_SCORE})
     else:
         logger.warning('illegal proxy: %s' % proxy)
Ejemplo n.º 19
0
 async def is_proxy_valid(proxy, url=TEST_URL):
     url = url
     ua = get_random_ua()
     headers = {'User-Agent': ua}
     try:
         conn = aiohttp.TCPConnector(verify_ssl=False)
         async with aiohttp.ClientSession(headers=headers,
                                          connector=conn) as session:
             async with session.get(url, proxy=proxy, ssl=False) as resp:
                 code = resp.status
                 if 200 <= code < 300:
                     logger.info('%s is valid' % proxy)
                     return True
                 else:
                     logger.info('%s is invalid, code: %s' % (proxy, code))
                     return False
     except (ClientConnectionError, ClientHttpProxyError, TimeoutError,
             CancelledError, ClientProxyConnectionError, Exception) as e:
         logger.warning(e)
         return False
Ejemplo n.º 20
0
    def selectScore(self):
        if (self.count >= 2):
            #失败次数大于2次
            return 2

        #从数据库中拿出成绩
        term = configfile.getConfig("term", "termStr")
        scoreInfo = self.session.query(Scores).filter_by(openid=self.openidStr,
                                                         termStr=term).first()
        #如果数据库中的成绩不存在
        if (scoreInfo == None):
            logger.info("用户" + self.openidStr + "数据库中无成绩")
            #插入成绩 0表示插入
            TrueOrFalse = self.__updateScore(0)
            #如果插入失败
            if (TrueOrFalse == False):
                #失败次数+1
                self.count = self.count + 1
                logger.warning("用户" + self.openidStr + "已爬" + str(self.count) +
                               "次")
            #调用一次自身
            return self.selectScore()
        #如果数据库取出的成绩过期
        elif (self.__compareTime(scoreInfo.updateTime.date()) == False):
            logger.info("用户" + self.openidStr + "使用爬虫爬取成绩")
            #更新 1表示更新
            TrueOrFalse = self.__updateScore(1)
            if (TrueOrFalse == False):
                #如果今天的成绩为空,更新失败,但是数据库原来有数据
                #直接取原来的数据库的数据
                logger.info("用户" + self.openidStr + "从数据库中取成绩")
                return scoreInfo.score
            else:
                #如果数据库跟新成功了,那就直接取数据库的
                return self.selectScore()
        else:
            logger.info("用户" + self.openidStr + "从数据库中取成绩")
            return scoreInfo.score
Ejemplo n.º 21
0
def _write_excel(path, dataframes, sheet_names, index=False, **kwargs):
    """Save dataframes to an Excel workbook.

    :param path: output path
    :param dataframes: list of dataframes
    :param sheet_names: list of sheet names (same index as dataframes)
    :param index: if True, index names are exported
    :param kwargs: keyword arguments for pd.DataFrame.to_excel function
    :return: final output path
    """
    if path is None or path.isnone:
        logger.warning('No path set to write data to Excel! No file has been created.')
        return None
    try:
        logger.debug("Trying to open file '{}'".format(path))
        with pd.ExcelWriter(path) as writer:
            logger.debug("File {} opened.".format(path))
            for df, sheet_name in zip(dataframes, sheet_names):
                if isinstance(df.columns, pd.MultiIndex):
                    index = True  # index must be True if MultiIndex columns, otherwise NotImplementedError is raised
                df.to_excel(writer, sheet_name=sheet_name, index=index, **kwargs)
                logger.debug("Sheet {} written.".format(sheet_name))
        logger.debug("File {} closed.".format(path))
    except PermissionError as err:
        logger.exception(err)
        path = handle_permission_error(err, func=_write_excel, path=path, args=[dataframes, sheet_names, index],
                                       kwargs=kwargs, change_path_func=save_file, handle_read_only_error=True)
    except ValueError as err:
        if str(err).startswith("No engine for filetype"):
            path = handle_bad_extension_error(err, func=_write_excel, path=path, args=[dataframes, sheet_names, index],
                                              kwargs=kwargs, change_path_func=save_file, extension=".xlsx")
        else:
            raise err
    except FileNotFoundError as err:
        logger.exception(err)
        path = handle_file_not_found_error(err, func=_write_excel, path=path, args=[dataframes, sheet_names, index],
                                           kwargs=kwargs, change_path_func=save_file)
    return path
Ejemplo n.º 22
0
 async def is_high_anon(self, proxy):
     url = ANON_CHECK_URL
     try:
         async with aiohttp.ClientSession() as session:
             async with session.get(url, proxy=proxy, ssl=False,
                                    timeout=15) as resp:
                 code = resp.status
                 if 200 <= code < 300:
                     x_forwarded_for_json = await resp.json()
                     if self.anon_check_url == ANON_CHECK_URL:
                         x_forwarded_for = x_forwarded_for_json['origin']
                     else:
                         # 根据接口自己定义
                         x_forwarded_for = x_forwarded_for_json[
                             'X-Forwarded-For']
                     if self.real_ip in x_forwarded_for:
                         return False
                     return True
                 return False
     except (ClientConnectionError, ClientHttpProxyError, TimeoutError,
             CancelledError, ClientProxyConnectionError, Exception) as e:
         logger.warning('proxy: %s, %s' % (proxy, e))
         return False
Ejemplo n.º 23
0
 def start(self):
     if not (self.scrape_profiles or self.scrape_tweets):
         logger.warning(
             f'Nothing to do. Did you forget "profiles" or "tweets" instruction?'
         )
         return None
     if self.usersnames_df.empty:
         logger.warning(
             f'Nothing to do. Did you forget to set "users_all" or "users_list"? Or all users already exist?'
         )
         return None
     else:
         n_processes = min(len(self.usersnames_df), self.n_processes)
         logger.info(
             f'Start Twitter Scraping. | n_processes={n_processes}, session_id={self.session_id}, '
             f'session_begin_date={self.session_begin_date}, session_end_date={self.session_end_date}, timedelta={self.timedelta}, missing_dates={self.missing_dates}'
         )
     if self.scrape_profiles:
         self._populate_proxy_queue()
         mp_iterable = [(username, )
                        for username in self.usersnames_df['username']]
         with mp.Pool(processes=n_processes) as pool:
             pool.starmap(self.scrape_a_user_profile, mp_iterable)
     if self.scrape_tweets:
         self._populate_proxy_queue()
         if self.rescrape:
             mp_iterable = [
                 (username, begin_date, end_date)
                 for _, (username, begin_date,
                         end_date) in self.usersnames_df.iterrows()
             ]
         else:
             mp_iterable = [(username, self.session_begin_date,
                             self.session_end_date)
                            for username in self.usersnames_df['username']]
         with mp.Pool(processes=n_processes) as pool:
             pool.starmap(self.scrape_a_user_tweets, mp_iterable)
Ejemplo n.º 24
0
def add_file_extension(path: Union[str, Path],
                       extension=None,
                       replace=False,
                       keep_existing=False,
                       force_add=False) -> Path:
    """Add a specific extension to 'path'.

    :param path: path
    :param extension: extension. None is considered as '' extension
    :param replace: replace instead of append extension
    :param keep_existing: add extension only if it doesn't already exist
    :param force_add: add extension even if the correct extension already exists (not recommended)
    :return Path object
    """
    path = Path(path)
    if path.ext and keep_existing:
        if path.ext != FileExt(extension):
            logger.warning("Bad extension kept: {}".format(path.ext))
        return path
    if replace:
        return path.replace_ext(extension)
    if path.ext == FileExt(extension) and not force_add:
        return path
    return path.join_ext(extension)
Ejemplo n.º 25
0
 def on_closing(self):
     # Case of an active thread
     if self.active_thread and self.active_thread.is_alive():
         logger.warning("Process running in background!")
         logger.debug("Thread '{}: {}' still alive.".format(
             self.active_thread.name, self.active_thread))
         logger.debug(
             "number of active threads: {}, current thread: {}, main thread: {}."
             .format(threading.active_count(), threading.current_thread(),
                     threading.main_thread()))
         self.active_thread.join(timeout=1)  # wait 1 second
         if self.active_thread.is_alive(
         ):  # if the thread is still alive after 1 second
             if not self.ask_exit(self.active_thread.func_name):
                 logger.debug("Exit cancelled")
                 return False
     # Remove logger handler attached to tkinter frame
     self.logger_frame.quit()
     # Destroy the window
     self.destroy()
     logger.debug("Main window destroyed.")
     self.quit()
     logger.debug("Main window quited.")
     return True
Ejemplo n.º 26
0
def convert_dict_from_str(dico,
                          allow_multiple=True,
                          error='ignore',
                          drop_none=False,
                          drop_empty_iterable=False,
                          ascendant=False,
                          no_flag='ignore',
                          inplace=False,
                          duplicates='first',
                          **parser_cfg):
    """Convert a dict of str (generated from a file for example) to a typed dict.
    Simple types that can be recognised: str, bool, int, float, list, tuple.
    Custom classes: Reference, Path
    Advanced types (with 'auto' flag): expressions with dict, set, bytes, None and simple types.

    :param dico: dictionary-like object to convert with keys and values of type 'str'.
    :param allow_multiple: if True, multiple flags are allowed (applied from left to right)
    :param error: behavior on casting error. Possible values:
                    'ignore' (returns the initial string),
                    'drop' (returns None),
                    'error' (raise an error if casting fails),
                    'auto-conversion' (try to convert automatically; if it fails, returns the initial string)
    :param drop_none: if True, None values are dropped
    :param drop_empty_iterable: if True, empty iterable objects (list, tuple) are dropped
    :param ascendant: if True, the flags are applied from the last to the first
    :param no_flag: behavior if no flag found. 'ignore', 'error', 'drop', 'auto-conversion'
    :param inplace: returns dico inplace
    :param duplicates: behavior if duplicates found. 'drop', 'first', 'last', 'error'.
    :param parser_cfg: kwargs for _parse_key function
    :return: dictionary-like object (same type as 'dico')

    # Simple test

    >>> test_dict = {"a": "without_flag", "@i-b": "1", "@f-c": "9.2", "@b-d": "", "@b-e": "5",  "@b-f": "False"}
    >>> convert_dict_from_str(test_dict)
    {'a': 'without_flag', 'b': 1, 'c': 9.2, 'd': False, 'e': True, 'f': False}

    # Numbers test

    >>> num_dict = {"@f-d1": "1.6", "@i-d2": "1.7", "@f-@i-d3": "1.8", "@i-@f-d4": "1.9", "d5": 2.0, "@ftwsdc-d6": "2 252,9"}
    >>> convert_dict_from_str(num_dict)
    {'d1': 1.6, 'd2': '1.7', 'd3': 1, 'd4': 1.9, 'd5': 2.0, 'd6': 2252.9}

    # Duplicates handling test

    >>> dup_dict = OrderedDict([("@s-overwritten", "value1"), ("overwritten", "value2"), ("@auto-overwritten", "value3")])
    >>> convert_dict_from_str(dup_dict, duplicates='rename')
    OrderedDict([('overwritten', 'value1'), ('overwritten_1', 'value2'), ('overwritten_2', 'value3')])
    >>> convert_dict_from_str(dup_dict, duplicates='first')
    OrderedDict([('overwritten', 'value1')])
    >>> convert_dict_from_str(dup_dict, duplicates='last')
    OrderedDict([('overwritten', 'value3')])

    # Date test

    >>> date_dict = {"@date-date": "2019-04-01", "@date-date2": "04-13-2018", "@date-date3": "13/04/2018"}
    >>> convert_dict_from_str(date_dict)
    {'date': Timestamp('2019-04-01 00:00:00'), 'date2': Timestamp('2018-04-13 00:00:00'), 'date3': Timestamp('2018-04-13 00:00:00')}
    >>> date_special_dict = {"@date-date_std": "04/11/2018", "@datedb-date_day_before": "04/11/2018"}
    >>> convert_dict_from_str(date_special_dict)
    {'date_std': Timestamp('2018-04-11 00:00:00'), 'date_day_before': Timestamp('2018-11-04 00:00:00')}

    # List test

    >>> list_dict = {"@auto-list1": "[18, 13]", "@l-list2": "[19, 13]", "@auto-list3": "[{(18, 13): 'a'}, 'end']"}
    >>> convert_dict_from_str(list_dict)
    {'list1': [18, 13], 'list2': ['19', '13'], 'list3': [{(18, 13): 'a'}, 'end']}

    # Auto conversion test

    >>> auto_dict = {"a": "True", "b": "False", "c": "None", "d": "[{(18, 13): 'a'}, 'end']", "e": '9.9', "f": 9.9}
    >>> convert_dict_from_str(auto_dict, no_flag="auto-conversion")
    {'a': True, 'b': False, 'c': None, 'd': [{(18, 13): 'a'}, 'end'], 'e': 9.9, 'f': 9.9}
    """
    # It is supposed that all keys are lower case!
    # If some keys are identical, only one will be retained (the last), others will be overwritten!
    if dico is None:
        logger.warning("none as dict")
        return None
    n_dico = type(dico)()
    for k, v in dico.items():
        n_k, flags = _parse_key(k, **parser_cfg)
        if not allow_multiple:
            flags = flags[0] if flags else None
        if not flags:
            flags = _no_flag_handling(k, no_flag=no_flag)
            if flags is None:
                continue
        n_v = _multiple_item_conversion(
            v,
            flags,
            error=error,
            drop_none=drop_none,
            drop_empty_iterable=drop_empty_iterable,
            ascendant=ascendant)
        if n_v is not None or not drop_none:
            _handle_duplicates(n_dico, n_k, n_v, duplicates, inplace=True)
    if inplace:
        dico.clear()
        dico.update(n_dico)
        return
    return n_dico
Ejemplo n.º 27
0
    def init(self, default_config: Union[dict, ConfigDict] = None, path: Union[str, Path] = None,
             auto_load: bool = True, default_section: str = DEFAULT_SECTION, section: str = None,
             conversion_dict: dict = None, force_load: bool = False, load_empty: bool = False,
             auto_cast: bool = False, write_flags: bool = None, ask_path: bool = True,
             search_in_default_config: bool = True, merge_default_how: str = 'right', **kwargs):
        """cf. __init__

        :param path: path of the current configuration file.
        :param default_config: default configuration dictionary-like object with two levels.
        Preferred type is ConfigDict.
        :param auto_load: if True, configuration file is loaded at initialisation.
        :param default_section: string of the default section in configuration file.
        :param section: current section of current configuration
        :param conversion_dict: conversion of string values into other types.
        :param force_load: argument for load method.
        :param load_empty: if True, empty configuration can overwrite existing one
        :param auto_cast: if True, the read configuration values are automatically converted to basic Python types
        :param write_flags: if None, same as auto_cast.
                            If True, flags are added to the written configuration keys to explicit value types
        :param ask_path: if True and path is None, the configuration path is asked to the user,
        otherwise, the path remains None. Actually, it is the argument of open_file function.
        :param search_in_default_config: if True, the default configuration is automatically used when a key
        is not found in the current configuration. If a section, which exists in default config
         and doesn't in current config, is tried to being accessed, it is created in the current config " todo
         :param merge_default_how: merge method for load method.
         Default is 'outer' (both existing and new keys are kept, values are updated)
         :param kwargs: other keyword arguments (not used)
        """
        # Check arguments
        if kwargs:
            logger.warning("Keyword arguments '{}' are not valid.".format(kwargs))
        # self._check_args(path=path, default_config=default_config, auto_load=auto_load,
        #                  default_section=default_section, section=section, conversion_dict=conversion_dict,
        #                  force_load=force_load, auto_cast=auto_cast, write_flags=write_flags)

        if not isinstance(conversion_dict, dict):
            conversion_dict = {}
        # Parameters
        self._auto_cast = auto_cast
        # if write_flags is not defined, write_flags is the same as auto_cast
        self._write_flags = auto_cast if write_flags is None else write_flags
        self._force_load = force_load
        self._load_empty = load_empty
        self._ask_path = ask_path
        self._search_in_default_config = search_in_default_config  # todo
        self._conversion_dict = conversion_dict
        # Load default config
        self._default_config = ConfigDict(default_config)
        self._cfg = self.default_config.deepcopy()  # first current configuration, before load  # todo: needed?
        # Sections
        if not isinstance(default_section, str):
            default_section = DEFAULT_SECTION
        self._default_config.default_section = default_section  # default section of default config
        self._default_config.section = self._default_config.default_section if section is None else section
        self.default_section = default_section  # default section of current config
        self.section = self.default_section if section is None else section
        # Paths
        self.path = path
        self._default_path = self._path.copy() or Path(path)  # if path is None, keep the first path
        # Load configuration from file
        if auto_load:
            self.load(merge_how=merge_default_how)
        self._init_count += 1
        logger.debug("Config initialized. Number of initialization(s): {}".format(self._init_count))
Ejemplo n.º 28
0
 def __init__(self, *args, **kwargs):
     if args or kwargs:
         logger.warning("Config object doesn't take any argument.")
     super().__init__()
Ejemplo n.º 29
0
# collection_names = ['profiles', 'proxies']
collection_names = ['profiles', 'proxies', 'tweets']

source_database = 'twitter_database'
backup_database = f'twitter_database_backup_{datetime.now().date()}'

client = MongoClient()
source_db = client[source_database]
backup_db = client[backup_database]

for collection_name in collection_names:
    source_collection = source_db[collection_name]
    backup_collection = backup_db[collection_name]
    # check if source exists. If so, do nothing
    if collection_name in backup_db.list_collection_names():
        logger.warning(f'{collection_name} already exists in {backup_db}')

    else:
        # Create indices
        for name, index_info in source_collection.index_information().items():
            keys = index_info['key']
            del (index_info['ns'])
            del (index_info['v'])
            del (index_info['key'])
            backup_collection.create_index(keys, name=name, **index_info)
            logger.info(f'Index {name} for {collection_name} created')
        # Copy documents
        i = 0
        for doc in source_collection.find({}):
            backup_collection.insert_one(doc)
            i += 1
Ejemplo n.º 30
0
def raise_anomaly(flag="ask",
                  error=None,
                  title=None,
                  message=None,
                  use_messagebox=True):
    """Raise an 'anomaly' depending on the flag argument:
        - 'ignore': do nothing and return,
        - 'warning': show a warning (warning logger and tkinter messagebox),
        - 'error': raises an exception,
        - 'ask': ask the user either to ignore the anomaly or to raise an error.

    :param flag: must be 'ask', 'ignore', 'warning', 'error'
    :param error: Exception
    :param title: Title for message box and logger.
    :param message: Message for message box and error logger.
    :param use_messagebox: if True the tkinter messagebox will be used to show errors.
    :return:
    """
    # Incorrect flag
    if not isinstance(flag, str):
        msg = "Incorrect type '{}' for flag argument which must be a string.".format(
            type(flag))
        logger.error(msg)
        raise TypeError(msg)

    # Define unknown errors
    if not isinstance(title, str) or message is None:
        title = "Unknown error."
    if not isinstance(message, str) or message is None:
        message = "Unknown error."

    # Ask flag
    if flag == 'ask':
        res = messagebox.askyesnocancel(
            title=title,
            message="{}\n\nDo you want to continue "
            "the program otherwise (not recommended)?".format(message),
            default='no')
        if res is None:
            logger.debug("Anomaly 'ask' raised with title '{}' "
                         "and message '{}'.".format(title, message))
            logger.info("Program stopped by the user ('Cancel' button).")
            sys.exit(0)
        if res:
            flag = 'ignore'
        else:
            flag = 'error'
            use_messagebox = False  # message has already been shown.

    # Ignore flag
    if flag == 'ignore':
        logger.debug(
            "Anomaly 'ignore' raised with title '{}' and message '{}'.".format(
                title, message))
        return

    # Warning flag
    if flag == 'warning':
        logger.warning(
            "Anomaly 'warning' raised with title '{} and message '{}'.".format(
                title, message))
        if use_messagebox:
            messagebox.showwarning(title=title, message=message)
        return

    # Error flag
    if flag == 'error':
        logger.debug("Anomaly 'error' raised with title '{}'.".format(title))
        if not isinstance(error, BaseException):
            error = UnknownError
        if use_messagebox:
            messagebox.showerror(title=title, message=message)
        msg = '{}\n\n{}'.format(title, message)
        logger.error(msg)
        raise error(msg)

    # Unknown flag
    msg = "Incorrect value '{}' for flag argument which must be 'ask', 'ignore', 'warning' or 'error'. " \
          "Original error was: {}\n\n{}".format(flag, title, message)
    logger.error(msg)
    raise ValueError(msg)