コード例 #1
0
    def _create_task(self):
        if not self.results:
            time_lapsed = round(time.time() - self.time_start)
            if time_lapsed > self.results_wait:
                self.terminate_counter += 1
                logger.debug("Waiting to terminate {}/{}".format(
                              self.terminate_counter, self.termination_limit))
            return
        self._tasks_lock.acquire()
        task_results = self.results[:self.results_len]
        self.results = self.results[self.results_len:]
        try:
            result_ids, versions = zip(*task_results)
            result_ids = sorted(set(result_ids))
            model_version = min(versions)
            self.curr_task_id += 1
            task_name = f"task-{self.curr_task_id}-model-{model_version}"
            result_ids = [p for p in result_ids if os.path.exists(p)
                            and (os.path.basename(p) not in self.seen_results)]

            [self.seen_results.add(os.path.basename(p)) for p in result_ids]
            if not result_ids:
                self.terminate_counter += 1
                return
            with self.train_lock:
                task_id = self.cli.tasks_create(task_name, self.labels, result_ids)
                self.pending_tasks[task_id] = result_ids
            self.time_start = time.time()
        except (requests.exceptions.HTTPError,
                requests.exceptions.ConnectionError,
                requests.exceptions.RequestException) as e:
            self._tasks_lock.release()
            logger.critical(e)
コード例 #2
0
ファイル: api_utils.py プロジェクト: louistransfer/kajin
def authenticate(email, password):
    auth_url = 'https://api.jinka.fr/apiv2/user/auth'
    auth_dict = {'email':email, 'password':password}
    s = requests.Session()
    r_auth = s.post(auth_url, auth_dict)
    if r_auth.status_code == 200:
        logger.info('Authentification succeeded (200)')
        access_token = r_auth.json()['access_token']
    else:
        logger.critical(f'Authentification failed with error {r_auth.status_code}')
        return None, None

    headers = {
    'Accept': '*/*',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36',
    'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {access_token}',
    'Origin': 'https://www.jinka.fr',
    'Connection': 'keep-alive',
    'DNT': '1',
    'Sec-GPC': '1',
    'If-None-Match': 'W/f46-qWZd5Nq9sjWAv9cj3oEhFaxFuek',
    'TE': 'Trailers',
    }

    return s, headers
コード例 #3
0
    def translate(
        self,
        source: str,
        from_lang: Optional[str] = None,
        to_lang: Optional[str] = None,
        proxy: Optional[str] = None,
    ) -> str:
        '''
        if from_lang to_lang: None, use instance's

        if proxy != self.proxy: use new client
        '''

        if from_lang is None:
            self.from_lang_ = self.from_lang
        else:
            self.from_lang_ = from_lang
        if to_lang is None:
            self.to_lang_ = self.to_lang
        else:
            self.to_lang_ = to_lang

        if self.to_lang_ == self.from_lang_:
            return source

        # self.source_list = wrap(source, 1000, replace_whitespace=False)

        # last "." not converted to "。" in dest chinese, attache ' _xx', remove 'XXX' [:-4]
        if to_lang == 'zh':
            # source_list = wrap(source + ' . _xx', 500, replace_whitespace=False)
            # a chinese char about 3 bytes
            source_list = wrap(source + ' . _xx',
                               160,
                               replace_whitespace=False)
        else:
            source_list = wrap(source, 500, replace_whitespace=False)
        # self.source_list = wrap(source, 1695, replace_whitespace=False)
        #  MAX ALLOWED QUERY : 500 bytes/CHARS

        try:
            _ = (self._get_translation(elm, proxy=proxy)
                 for elm in source_list)
            seq = ' '.join(_)
        except Exception as exc:
            logger.warning(" seq = ' '.join, exc %s ", exc)
            # return None
            raise

        # if seq[:17] == 'MYMEMORY WARNING:':
        if seq.startswith(('MYMEMORY WARNING:', )):
            logger.critical("MYMEMORY WARNING: %s", seq)
            raise QuotaError(seq)

        # last "." not converted to "。" in dest chinese, attache ' _xx', remove 'XXX' [:-4]
        if self.to_lang == 'zh':
            return seq[:-5]

        return seq
コード例 #4
0
 def connect(self):
     self.connect = False
     try:
         print('~'*100 + f'\nConnecting to device: {self.ip}')
         self.connect = ConnectHandler(ip=self.ip, device_type='cisco_ios', username=self.username, password=self.password)
     except (NetMikoAuthenticationException, NetMikoTimeoutException) as conn_error:
         logger.warning(f'Unable to connect to device:\n{conn_error}')
     if not self.connect:
         logger.critical('No devices to connect to')
         sys.exit()
コード例 #5
0
def foo():

    logfile("/home/kaktus74/nasze_logi.log")

    for i in range(1, 100):

        logger.debug("SUper szczegolowe informacje")
        logger.info("Cos siw wydarzylo!")
        logger.warning("Chyba cos sie wywrocilo")
        logger.error("Oj, cos sie na pewno wywrocilo!")
        rok = 19
        logger.critical(f"Wykryto wirusa covid-{rok}! {i}")
コード例 #6
0
    def include_all_items_in_live(
        self
    ):  # If some items are "excluded" from the live plan, it will break the service planner app's item order. Run this to make all items "included" in the live plan.
        r = requests.get(
            f'https://api.planningcenteronline.com/services/v2/service_types/'
            f'{self.service_type}/plans/{self.plan_id}/items/?&include=item_times',
            auth=(APP_ID, SECRET))
        r = r.json()

        excluded = [
        ]  # /service_types/#/plans/#/items/#/item_times/id // list of dicts of item time ids that are excluded

        for item_time in r['included']:
            if item_time['attributes']['exclude'] is True:

                item = {
                    'item_id':
                    item_time['relationships']['item']['data']['id'],
                    'time_id': item_time['id']
                }

                excluded.append(item)
        if len(excluded) > 0:

            request_headers = {
                'Content-type': 'application/json',
                'Accept': 'text/plain'
            }
            payload = {
                "data": {
                    "type": "ItemTime",
                    "attributes": {
                        "exclude": False,
                    },
                }
            }

            for item in excluded:
                r = requests.patch(
                    f'https://api.planningcenteronline.com/services/v2/service_types/'
                    f'{self.service_type}/plans/{self.plan_id}/items/{item["item_id"]}/item_times/{item["time_id"]}',
                    headers=request_headers,
                    data=json.dumps(payload),
                    auth=(APP_ID, SECRET))
                if r.status_code != 200:
                    logger.critical(
                        'pco_plan.include_all_items_in_live: ERROR including item time %s',
                        item)
                else:
                    logger.info(
                        'pco_plan.include_all_items_in_live: updated live item to be included in plan: %s',
                        item)
コード例 #7
0
ファイル: first.py プロジェクト: prudecki/netmiko-gitlab-p3
def devices_connect(devices):
    connect = False
    n = int(0)
    for device in devices:
        try:
            print('~'*100 + f'\nConnecting to device: {device}')
            connect[n] = ConnectHandler(ip=device, device_type='cisco_ios', username=username, password=password)
            n = n+1
        except (NetMikoAuthenticationException, NetMikoTimeoutException) as conn_error:
            logger.warning(f'Unable to connect to device:\n{conn_error}')
    if not connect:
        logger.critical('No devices to connect to')
        sys.exit()
コード例 #8
0
ファイル: api_utils.py プロジェクト: louistransfer/kajin
def remove_expired(session, df, last_deleted_path):
    df_expired = df.loc[df["expired_at"].notna(), :]
    if len(df_expired)>15:
        logger.critical('Df slicing error')
        exit()
    logger.info('Starting the cleaning of expired offers.')
    for appart_id, row in tqdm(df_expired.iterrows()):
        post_url = 'https://api.jinka.fr/apiv2/alert/' + row['alert_id'] + '/abuses'
        data = {'ad_id':appart_id, 'reason':'ad_link_404'}
        session.post(post_url, data=data)
    df_expired.to_json(last_deleted_path, orient='columns')
    cleaned_df = df.loc[df['expired_at'].isna(), :]
    logger.info(f'Finished cleaning the {len(df_expired)} expired appartments.')
    return cleaned_df
コード例 #9
0
 def download_template(self):
     logger.debug('trying to connect to GITLAB')
     try:
         git_req = requests.get('http://172.17.0.3/api/v4/projects/1/repository/files/router/raw?ref=master',
                                headers={'PRIVATE-TOKEN': 'x6sP6xf57gb5sxxiXutq'})
     except requests.exceptions.RequestException as req_error:
         logger.critical(f'unable to connect to GITLAB:\n{req_error}')
         return False
     if git_req.status_code == 200:
         logger.debug('connection to GITLAB successful')
         self.template = git_req.text.splitlines()
         return True
     else:
         logger.info('Problem with downloading the template')
         return False
コード例 #10
0
def readConfig():
    configFile = "config.yml"

    try:
        with open(configFile, "r") as configString:
            parsedConfig = load(configString, Loader=FullLoader)
            log.debug("Read config file {0}, got {1}".format(
                configFile, parsedConfig))
            return parsedConfig

    except FileNotFoundError:
        log.critical("Can't read {0}!".format(configFile))
        print("Konnte die Configdatei nicht laden!")
        log.info("Program end.")
        sys.exit(1)
コード例 #11
0
ファイル: stream.py プロジェクト: cionkubes/workq
 async def connect_retry(self):
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock.setblocking(False)
     while True:
         try:
             await self.loop.sock_connect(sock, self.address)
             logger.info(
                 f"Connected to {self.address[0]}:{self.address[1]}.")
             return sock
         except (ConnectionRefusedError, ConnectionAbortedError,
                 socket.gaierror, OSError):
             logger.critical(
                 f"Connect call to {self.address[0]}:{self.address[1]} failed, retrying in {self.retry_timeout} "
                 "second(s).")
             await asyncio.sleep(self.retry_timeout)
コード例 #12
0
    async def connect(self):
        while True:
            try:
                self.conn = await r.connect(self.addr, self.port)
                sock = self.conn._instance._streamwriter.get_extra_info(
                    'socket')
                print(sock)
                set_keepalive(sock)
                break
            except r.ReqlDriverError:
                logger.critical(
                    f"Failed to connect to database, retrying in {self.retry_timeout} seconds."
                )
                await asyncio.sleep(self.retry_timeout)

        self.connection_available.set()
        self.connection_available.clear()
コード例 #13
0
ファイル: orchestrator.py プロジェクト: cionkubes/workq
        def receive(message):
            try:
                type = message[Keys.TYPE]

                if type not in Types.all:
                    logger.critical(f"Unknown message type {type}")
                    return

                if type not in dispatch_table:
                    logger.debug(
                        f"Message type {type} does not have an handler.")
                    return

                handler = dispatch_table[type]
                asyncio.ensure_future(handler(self, stream, message))
            except:
                logger.exeption("Unexpected exception in message handling")
コード例 #14
0
 def __prepare(self, infile, cities):
     scripture = MongoClient(settings.MONGO).scripture
     df = pandas.read_csv(infile, error_bad_lines=False)
     # keys = df.keys().tolist()
     cities = cities and Cities.from_json(cities) or Cities()
     cities.hub = MongoClient(settings.HUB_MONGO).hub
     for idx, row in df.iterrows():
         isnull = row.isnull()
         if isnull.get('ta'):
             continue
         bonotel = not isnull.get('bonotel', True) and \
             int(row.bonotel) or None
         roomsxml = not isnull.get('roomsxml', True) and \
             int(row.roomsxml) or None
         hotelspro = not isnull.get('hotelspro', True) and \
             str(row.hotelspro) or None
         hotelbeds = not isnull.get('hotelbeds', True) and \
             int(row.hotelbeds) or None
         jactravel = not isnull.get('jactravel', True) and \
             int(row.jactravel) or None
         ta = row.get('ta')
         hcom = row.get('hotel_URL')
         if pandas.isnull(hcom):
             continue
         try:
             hcom_id = self.__parse_hcom_id(hcom)
         except AttributeError:
             continue
         try:
             _city = row.get('city') or row.get('城市')
             city_name = self._CITY_EN_RE.match(_city).group(1)
             city = cities.name(city_name.strip())
             yield Hotel(
                 db=scripture,
                 hotel_id=roomsxml,
                 hotels_cn_id=hcom_id,
                 bonotel=bonotel,
                 hotelspro=hotelspro,
                 hotelbeds=hotelbeds,
                 jactravel=jactravel,
                 comments_url=ta,
                 city=city
             )
         except (KeyError, IndexError, TypeError) as e:
             logger.critical('hcom(%s), city(%s)', hcom_id, _city)
             logger.exception(e)
コード例 #15
0
    def _check_status(self):
        with self.train_lock:
            task_ids = self.pending_tasks.keys()
            if not task_ids:
                return
            try:
                for i in self._get_completed_tasks(task_ids):
                    task_id, image_ids = self.cli.tasks_dump(i)
                    self._add_train(task_id, image_ids)
                    # updating task start time to prevent a new task
                    # creation immediatelly after releasing lock
                    self.time_start = time.time()
                    self._tasks_lock.release()

            except (requests.exceptions.HTTPError,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.RequestException) as e:
                logger.critical(e)
コード例 #16
0
def interrupt_experiment_on_unhealthy_probe(probe: Probe,
                                            run: Run,
                                            configuration: Configuration,
                                            secrets=Secrets) -> None:
    if experiment_finished.is_set():
        return

    tolerance = probe.get("tolerance")
    checked = within_tolerance(tolerance,
                               run["output"],
                               configuration=configuration,
                               secrets=secrets)
    if not checked and not guardian.interrupted:
        guardian.interrupted = True
        if not experiment_finished.is_set():
            logger.critical(
                "Safeguard '{}' triggered the end of the experiment".format(
                    probe["name"]))
            exit_gracefully()
コード例 #17
0
ファイル: main.py プロジェクト: louistransfer/kajin
def run_all(email, password, expired):
    s, headers = authenticate(email, password)

    if s == None:
        logger.critical('Aborting search, check your credentials.')
        quit()
    df_alerts = get_alerts(s, headers)
    df_apparts, expired_index = get_all_apparts(df_alerts, s, headers)
    df_apparts = cleaner(df_apparts)
    df_apparts = features_engineering(df_apparts)
    df_history = append_history_df(df_apparts, HISTORY_PATH)
    df_apparts = df_apparts.loc[~df_apparts.index.duplicated()]
    df_apparts = get_all_links(s, df_apparts, expired, APPARTS_DB_PATH)
    if expired:
        df_history = update_history_df(df_apparts, df_history, expired_index)
        df_apparts = remove_expired(s, df_apparts, LAST_DELETED_PATH)
    df_apparts.to_csv(APPARTS_CSV_PATH, sep=';', encoding='utf-8')
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            df_apparts.to_excel(APPARTS_XLSX_PATH, encoding='utf-8')
        except IllegalCharacterError as e:
            logger.warn(
                "Some illegal characters were replaced in the dataframe.")
            ILLEGAL_CHARACTERS_RE = re.compile(
                r'[\000-\010]|[\013-\014]|[\016-\037]')
            df_apparts.applymap(lambda x: ILLEGAL_CHARACTERS_RE.sub(r'', x)
                                if isinstance(x, str) else x).to_excel(
                                    APPARTS_XLSX_PATH, encoding='utf-8')

    df_history.to_csv(HISTORY_PATH, sep=';', encoding='utf-8')

    if upload:
        uploader = Uploader(credentials_path=CREDS_PATH,
                            token_file_path=TOKEN_FILE_PATH,
                            secret_client_path=SECRET_CLIENT_PATH)
        uploader.push_table(
            df_apparts,
            spreadsheet_id='131UoWqQwZfydMJ3yqVe-L6TY6NKtJx8zVNppo034dT4',
            worksheet_name='apparts',
            index=True)
コード例 #18
0
    def process_failure(self, failure, message_id, sign_method, sign_params,
                        request_counter):
        if not isinstance(failure.value, ServiceException):
            # All handled exceptions should inherit from ServiceException class
            # Throwing other exception class means that it is unhandled error
            # and we should log it
            logger.critical(failure)

        sign = False
        code = getattr(failure.value, "code", -1)

        if message_id is not None:
            # Other party does not care of error state for notifications
            if os.environ.get("debug"):
                tb = failure.getBriefTraceback()
            else:
                tb = None
            self.writeJsonError(code, failure.getErrorMessage(), tb,
                                message_id, sign, sign_method, sign_params)

        request_counter.decrease()
コード例 #19
0
ファイル: first.py プロジェクト: prudecki/netmiko-gitlab-p3
def push_standard_config():
    logger.debug('trying to connect to GITLAB')
    try:
        git_req = requests.get('http://172.17.0.3/api/v4/projects/1/repository/files/start/raw?ref=master', headers={'PRIVATE-TOKEN': 'x6sP6xf57gb5sxxiXutq'})
    except requests.exceptions.RequestException as req_error:
        logger.critical(f'unable to connect to GITLAB:\n{req_error}')
        return False
    logger.debug('checking if connection was successful')
    if git_req.status_code == 200:
        logger.debug('connection to GITLAB successful')
        out = git_req.text.splitlines()
        if isinstance(out, list):
            logger.debug(f'about to send this configuration to device:\n{out}')
            logger.debug(f'connecting to devices:\n{devices_ip}')
            devices_connect(devices=devices_ip_strip)
            try:
                abc = devices_connect.
                sendconf = connect.send_config_set(out)
            except Exception as sendconf_error:
                logger.critical(sendconf_error)
                return False
            if (re.search('\^', sendconf)) is not None:
                conf_apply_error = ConfigInputException('Error in some command! Check log')
                logger.critical(f'{conf_apply_error} output from device:\n{sendconf}')
                raise conf_apply_error
            else:
                conf_success = 'Configuration applied successfully'
                logger.info(conf_success + f'output from device:\n{sendconf}')
                return True
        else:
            list_error = AttributeError('Problem with converting template to list')
            logger.critical(list_error)
            raise list_error
    else:
        logger.info('Problem with downloading the template')
        return False
コード例 #20
0
 def push_template(self):
     if isinstance(self.template, list):
         logger.debug(f'about to send this configuration to device:\n{self.template}')
         logger.debug(f'connecting to device:\n{self.ip}')
         try:
             sendconf = self.connect.send_config_set(self.template)
         except Exception as sendconf_error:
             logger.critical(sendconf_error)
             return False
         if (re.search('\^', sendconf)) is not None:
             conf_apply_error = ConfigInputException('Error in some command! Check log')
             logger.critical(f'{conf_apply_error} output from device:\n{sendconf}')
             raise conf_apply_error
         else:
             conf_success = 'Configuration applied successfully'
             logger.info(conf_success + f'output from device:\n{sendconf}')
             return True
     else:
         list_error = AttributeError('Problem with converting template to list')
         logger.critical(list_error)
         raise list_error
コード例 #21
0
async def print_loop() -> None:
    """main loop - checks if entries exist periodically and prints them"""
    await client.wait_until_ready()
    # setup global variables
    guilds = list(iter(client.guilds))
    if len(guilds) != 1:
        logger.critical("This bot should only be used on one server")
        await client.logout()
        sys.exit(1)
    channels = guilds[0].channels
    client.feed_channel = get(channels, name="feed")
    client.nsfw_feed_channel = get(channels, name="nsfw-feed")
    if client.feed_channel is None:
        logger.critical("Couldn't find the 'feed' channel")
    if client.nsfw_feed_channel is None:
        logger.critical("Couldn't find the 'nsfw-feed' channel")
    client.old_db = OldDatabase(filepath=old_db_file)
    client.loop.create_task(export_loop())
    while not client.is_closed():
        # if there are new entries, print them
        await print_new_embeds()
        logger.debug(f"Sleeping for {client.period}")
        await sleep(client.period)
コード例 #22
0
ファイル: main.py プロジェクト: slnsw/dxlab-subplot
from pathlib import Path
from collections import defaultdict, namedtuple

from logzero import logger
from PyTexturePacker import Packer
from PyTexturePacker.MaxRectsPacker.MaxRectsPacker import MaxRectsPacker
from pymongo import MongoClient
from settings import settings


try:
    from sh import crunch, ErrorReturnCode
except:
    crunch = lambda *args, **kwargs: logger.critical(
        "Please install crunch for PNG optimization"
    )

try:
    from sh import cwebp
except:
    cwebp = lambda *args, **kwargs: logger.critical(
        "Please install cwebp for WEBP compression"
    )


RE_DIM = re.compile(r"\d+")

Rect = namedtuple("Rect", ["x", "y", "w", "h"])
Dimensions = namedtuple("Dimensions", ["w", "h"])
Point = namedtuple("Point", ["x", "y"])
コード例 #23
0
ファイル: log_sample1.py プロジェクト: nanigasi-san/nanigasi
from logzero import logger

logger.critical('critical')
logger.error('error')
logger.warning('warning')
logger.info('info')
logger.debug('debug')
コード例 #24
0
    logzero.formatter(logzero.LogFormatter(datefmt="%Y-%m-%d %H:%M:%S"))

    if args["--log"]:
        logzero.logfile(
            args["--log"],
            encoding="utf-8",
            formatter=logzero.LogFormatter(datefmt="%Y-%m-%d %H:%M:%S",
                                           color=False),
        )

    if args["--format"] == "grobid":
        parse_file = parse_tei_file
    elif args["--format"] == "parscit":
        parse_file = parse_parscit
    else:
        log.critical(f"Unknown --format: {args['--format']}")
        exit(1)

    cited_years = {}
    for dirname in args["<dir>"]:
        if not os.path.exists(dirname):
            log.error(f"Directory not found: {dirname}")
            continue
        dir_diff, dir_files, total_files = 0, 0, 0
        for filename in glob(f"{dirname}/*.xml"):
            base = os.path.basename(filename)
            file_id = base.split(".")[0]
            if file_id.endswith("-parscit"):
                file_id = file_id[:-8]
            log.debug(f"Parsing {base}")
            cited_years[file_id], diff = parse_file(filename)
コード例 #25
0
ファイル: generator.py プロジェクト: Phantasus/library-opac
logzero.loglevel(loglevelFromCli)

# Do we want to log as json?
if (jsonLogFromCli == "Y" or jsonLogFromCli == "YES"):
    logzero.json()

log.debug("Command Line Parameters: {0}".format(args))

# Load config
configFile = "config.yml"
try:
    with open(configFile, "r") as configString:
        config = yaml.load(configString, Loader=yaml.FullLoader)

except FileNotFoundError:
    log.critical("Can't read {0}!".format(configFile))
    exit(1)

log.info("Library Name: {0}".format(config["libraryName"]))
log.info("Locales to generate: {0}".format(config["languages"]))
log.info("Default locale: {0}".format(config["defaultLanguage"]))
log.info("Timezone: {0}".format(config["timezone"]))

# Defaults
sourceFile = args["source"] if args[
    "source"] else "~/library-media-inventory/inventory.csv"
log.info("Source file: {0}".format(sourceFile))

## Some variables, init jinja2
# Current folder
workDir = os.path.dirname(os.path.realpath(__file__))
コード例 #26
0
def main(args):
    """ Let's a go... """

    # set log level
    if args.verbose > 0:
        logzero.loglevel(logging.DEBUG)
    else:
        logzero.loglevel(logging.INFO)

    mode = ""
    # are we dealing with a file or a directory?
    if os.path.isdir(args.path):
        mode = "DIR"
        if not args.path.endswith('/'):
            args.path += '/'
    elif os.path.isfile(args.path):
        mode = "FILE"
    else:
        logger.critical("Invalid file or directory path specified.")

    # check and set output dir if necessary otherwise
    # we use the input dir to write our new files to

    # if the output_dir is specified we create dirs as required
    #
    # if it isn't and we are dealing with a directory of files
    # then we just make sure the input has a trailing slash
    #
    # if we are in file mode we'll and we have no output_dir
    # specified we'll use the input path to the file
    output_dir = ""
    if args.output_dir:
        output_dir = args.output_dir

        if not output_dir.endswith('/'):
            output_dir += '/'

        # create any dirs we need to
        if not os.path.exists(os.path.dirname(output_dir)):
            try:
                os.makedirs(os.path.dirname(output_dir))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
    elif mode == "DIR":
        output_dir = args.path
        if not output_dir.endswith('/'):
            output_dir += '/'

    # directory so let's loop over and do 'em all
    if mode == "DIR":
        logger.debug("Directory mode, scanning: " + args.path)

        # now let's get to the files and extract the transcripts from each
        directory = os.fsencode(args.path)

        for file in os.listdir(directory):
            filename = os.fsdecode(file)

            full_path = output_dir + filename
            if filename.endswith(".vtt"):
                logging.debug("Attempting to extract from file: " + filename)
                plaintext = extract_plaintext_from_webvtt(args.path + filename)
                output_string_to_file(plaintext, full_path, 'txt')
                continue
            else:
                logger.debug("Skipping file: " + full_path)
                continue

    elif mode == "FILE":
        # single file specified so just the one extraction to do
        logger.debug("Single file mode, scanning: " + args.path)

        if args.output_dir:
            # need to split the filename from the path as output_dir has been specified
            path, filename = os.path.split(args.path)
            output_path = output_dir + filename
        else:
            # we're plopping the files from whence they came so just use the input path to the file
            output_path = args.path

        try:
            with open(args.path):
                plaintext = extract_plaintext_from_webvtt(args.path)
                output_string_to_file(plaintext, output_path, 'txt')

        except IOError:
            logger.error("Input file not accessible, please check the path: " +
                         args.filepath)
コード例 #27
0
    def _initialize_bucket_structure(self):
        """
        This function creates the base structure for the
        bucket regarding this experiment.
        """
        storage_creator = StorageCreator(self.storage_config)
        storage_object = storage_creator.build_storage_object()

        # Dump some test data
        FILE_TEST_SIZE = int(5e6)  # Approx 5MB
        MAX_RETRIES = 2
        retries = 0
        file_path = os.path.join(self.experiment_dir,
                                 "initialization-service-test")
        generate_big_random_bin_file(file_path, FILE_TEST_SIZE)

        while retries < 2:
            # Perform Storage Connection Test
            logger.info("####### Testing Connection to Storage Endpoint ....")
            storage_object.create_bucket(DEFAULT_BUCKET_NAME)
            try:
                storage_interfacer = JobStorageInterface(
                    storage_obj=storage_object)
                storage_interfacer.put_job_data(
                    bucket=DEFAULT_BUCKET_NAME,
                    username=self.username,
                    project_id=self.project_name,
                    experiment_id=self.experiment_id,
                    variant="Initialization Service",
                    job_id="inital_testing",
                    local_path=file_path)
                logger.info("File Upload to Storage Endpoint Succeeded....")
                break
            except ConnectionError as e:
                logger.error(
                    "File Upload failed, please check to storage config ....\m"
                    f"{e}")

                if retries == MAX_RETRIES - 1:
                    import sys
                    logger.critical("\nProblem connecting to Storage. Exiting")
                    sys.exit(status="Problem connecting to Storage. Exiting")
                else:
                    logger.error(
                        f"Trying to reconnect. {MAX_RETRIES - retries} remaining"
                    )
                    retries += 1
                    continue

        file_path = os.path.join(self.experiment_dir,
                                 "initialization-service-test_restore")
        try:
            storage_interfacer.get_job_data(bucket=DEFAULT_BUCKET_NAME,
                                            username=self.username,
                                            project_id=self.project_name,
                                            experiment_id=self.experiment_id,
                                            variant="Initialization Service",
                                            job_id="inital_testing",
                                            local_path=file_path)
            logger.info("File Download from Storage Endpoint Succeeded....")
        except ConnectionError as e:
            logger.warning(
                "File Download failed, connection might be unstable ....\m"
                f"{e}")
コード例 #28
0
    def one(self, crawled_hotel, collection_name):
        if "en" not in crawled_hotel:
            if "country" not in crawled_hotel:
                logger.error("Country is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            country_name = crawled_hotel["country"]
            if "city" not in crawled_hotel:
                logger.error("City is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            city = crawled_hotel["city"]
            if "name" not in crawled_hotel:
                logger.error("Name is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            name = crawled_hotel["name"]
            if "address" not in crawled_hotel:
                logger.error("Address is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            address = crawled_hotel["address"]
            if "latitude" not in crawled_hotel:
                logger.error("Latitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            latitude = crawled_hotel["latitude"]
            if "longitude" not in crawled_hotel:
                logger.error("longitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            longitude = crawled_hotel["longitude"]
            country = self._find_country_by_cn_name(country_name)
        else:
            if "country" not in crawled_hotel["en"]:
                logger.error("Country is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            country_name = crawled_hotel["en"]["country"]
            if "city" not in crawled_hotel["en"]:
                logger.error("City is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            city = crawled_hotel["en"]["city"]
            if "name" not in crawled_hotel["en"]:
                logger.error("Name is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            name = crawled_hotel["en"]["name"]
            if "address" not in crawled_hotel["en"]:
                logger.error("Address is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            address = crawled_hotel["en"]["address"]
            if "latitude" not in crawled_hotel["en"]:
                logger.error("Latitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            latitude = crawled_hotel["en"]["latitude"]
            if "longitude" not in crawled_hotel["en"]:
                logger.error("Longitude is missing. ObjectId(%s)",
                             crawled_hotel["_id"])
                return None
            longitude = crawled_hotel["en"]["longitude"]
            country = self._find_country_by_en_name(country_name)
        if not country:
            country = self._find_country_by_partial_name(country_name)
        if not country:
            logger.error('Country("%s") of Hotel(%s) not found.', country_name,
                         name)
            return None
        country_code = country["code_cca2"]
        try:
            destination = self._destination_matching(
                country_code=country_code,
                latitude=latitude,
                longitude=longitude,
                address=address,
            )
        except Exception as e:
            logger.exception(e)
            return None
        if not destination:
            logger.critical(
                "Bad destination of Hotel(%s) at "
                "Country(%s) with City(%s)",
                name,
                country_name,
                city,
            )
            return None

        try:
            matches = self._hotel_matching(
                name=name,
                address=address,
                longitude=longitude,
                latitude=latitude,
                phone=None,
                wg_destination_id=destination.get("destination_id"),  #
                wg_city_id=destination.get("city_id"),
                wg_province_id=destination.get("province_id"),
                wg_country_id=destination.get("country_id"),
            )
        except Empty:
            logger.critical(
                "Similarities of Hotel(%s) at Destination(%s) is empty.",
                name,
                destination,
            )
            return None
        except Exception as e:
            logger.critical(
                "Falied to got matched hotels. "
                "Hotel('%s'), Destination(%s)",
                name,
                destination,
                exc_info=e,
            )
            return None
        for _matched in matches:
            logger.info(
                self._set_relationships(
                    _matched["provider"],
                    _matched["oid"],
                    collection_name,
                    str(crawled_hotel["_id"]),
                ))
        return True