예제 #1
0
 def rebuild_db(self):
     start_time = datetime.datetime.now()
     self.database_clean(set_state=False)
     Gphoto.drop_collection()
     self.walk(folder=self.root)
     self.database_clean(set_state=True)
     logger.info(f"Full resync elapsed time: {datetime.datetime.now() - start_time}")
예제 #2
0
 def update_db(self):
     drive_changes = self.validate_drive_changes()
     if not drive_changes:
         logger.info("No changes to photos detected")
         return
     delete_count = new_count = 0
     self.database_clean(set_state=False)
     for change in drive_changes:
         if change.removed:
             Gphoto.objects(gid=change.fileId).delete()
             logger.info(
                 f"Removing record for file ID {change.fileId} from database if it exists."
             )
             delete_count += 1
         else:
             try:
                 Gphoto.objects(gid=change.gphoto.gid).get()
             except me.DoesNotExist:
                 logger.info(f"Updating record {change.gphoto.name}")
                 change.gphoto.save(force_insert=False)
                 new_count += 1
                 continue
             else:
                 logger.info(f"Update skipped; Google ID already in database")
     self.database_clean(set_state=True)
     logger.info(
         f"Sync update complete. New file count: {new_count} Deleted file count: {delete_count}"
     )
예제 #3
0
 def sync(self):
     if self.database_clean() and self.start_token() is not None:
         self.update_db()
     else:
         logger.info("Database dirty: Rebulding")
         self.rebuild_db()
     self.start_token(update=True)
예제 #4
0
파일: app.py 프로젝트: labroid/PhotoManager
 def check_gphotos_membership(self):
     self.status('Checking for photos not in Gphotos')
     for photo in Queue.objects(me.Q(md5sum__ne=None) & me.Q(in_gphotos=False)):
         match = Gphoto.objects(md5Checksum=photo.md5sum).first()
         if match:
             photo.gphotos_path = match.path
             photo.gid = match.gid
             photo.in_gphotos = True
             photo.original_filename = match.originalFilename
         else:
             photo.in_gphotos = False
         photo.save()
     logger.info("Check Gphotos enqueue done")
예제 #5
0
 def walk(self, folder, path=None):
     path = path or []
     folders = []
     db_nodes = []
     path.append(folder.name)
     logger.info(f"Path: {path}")
     for node in self.get_nodes(folder):
         node.path = path
         if node.mimeType == FOLDER:
             folders.append(node)
         db_nodes.append(node)
     if db_nodes:
         Gphoto.objects.insert(db_nodes)
     for folder in folders:
         self.walk(folder, path)
     path.pop()
예제 #6
0
    def get_google_drive_changes(self):
        """
        Google API for changes().list() returns:
        {
            "kind": "drive#changeList",
            "nextPageToken": string,
            "newStartPageToken": string,
            "changes": [
                changes Resource
            ]
        }

        where a changes Resource is:

        {
            "kind": "drive#change",
            "type": string,
            "time": datetime,
            "removed": boolean,
            "fileId": string,
            "file": files Resource,
            "teamDriveId": string,
            "teamDrive": teamdrives Resource
        }

        """
        change_token = Gphoto_state.objects().get()["start_token"]
        changes = []
        while True:
            response = (
                service.changes()
                .list(
                    pageToken=change_token,
                    pageSize=1000,
                    includeRemoved=True,
                    fields=UPDATE_FIELDS,
                )
                .execute()
            )
            logger.info(
                f"Google sent {len(response.get('changes', []))} change records"
            )
            changes += response["changes"]
            change_token = response.get("nextPageToken")
            if change_token is None:
                break
        return changes
예제 #7
0
def _upload_binary_media(filepath, filename):
    creds = get_credentials()
    with open(filepath, "rb") as photo_fp:
        binary_file = photo_fp.read()
    url = r"https://photoslibrary.googleapis.com/v1/uploads"
    headers = {
        "Content-type": "application/octet-stream",
        "Authorization": f"Bearer {creds.token}",
        "X-Goog-Upload-File-Name": f"{filename}",
        "X-Goog-Upload-Protocol": "raw",
    }
    r = requests.post(url, headers=headers, data=binary_file)
    if r.ok:
        log_status = "Upload successful"
    else:
        log_status = "Upload failed"
    logger.info(f"{log_status}: Upload elapsed time: {r.elapsed}")
    return r
예제 #8
0
def _insert_new_photo(token):
    creds = get_credentials()
    headers = {"Authorization": f"Bearer {creds.token}"}
    insert_new_media_item = {
        "newMediaItems": [{"simpleMediaItem": {"uploadToken": token}}]
    }
    url = r"https://photoslibrary.googleapis.com/v1/mediaItems:batchCreate"
    r = requests.post(url=url, headers=headers, data=json.dumps(insert_new_media_item))
    response = r.json()
    status = response["newMediaItemResults"][0]["status"]["message"]
    if status != "OK":
        logger.info(f"NewMediaItem insertion failed. {pformat(response)}")
        success = False
    else:
        logger.info(f"Insertion successful. {pformat(response)}")
        success = True
    elapsed = r.elapsed.microseconds/1_000_000
    print(f"Media insertion elapsed time: {r.elapsed.microseconds/1_000_000} seconds")
    return success, elapsed
예제 #9
0
 def get_nodes(self, parent):
     cumulative = 0
     nodes = []
     nextpagetoken = None
     query = f"'{parent.gid}' in parents and (mimeType contains 'image/' or mimeType contains 'video/' or mimeType = 'application/vnd.google-apps.folder') and trashed = false"
     while True:
         start_time = datetime.datetime.now()
         response = (
             service.files()
             .list(
                 q=query, pageSize=1000, pageToken=nextpagetoken, fields=INIT_FIELDS
             )
             .execute()
         )
         elapsed = datetime.datetime.now() - start_time
         count = len(response["files"])
         cumulative += count
         logger.info(f"{elapsed} Drive delivered {count} files. Total: {cumulative}")
         sterile_nodes = [self.steralize(x) for x in response["files"]]
         nodes += [Gphoto(**x) for x in sterile_nodes]
         nextpagetoken = response.get("nextPageToken")
         if nextpagetoken is None:
             return nodes
예제 #10
0
파일: app.py 프로젝트: labroid/PhotoManager
 def mirror_file(self, photo):
     self.status("Mirroring files")
     dest = Path(cfg.local.mirror_root, *photo.gphotos_path, photo.original_filename)
     if not dest.is_file():
         self.copy_file(photo=photo, dest=dest)
         logger.info(f"Mirrored {photo.src_path} to {dest}")
     else:
         if file_md5sum(dest) == file_md5sum(photo.src_path):
             self.copy_file(photo=photo, dest=None)
             logger.info(f"Already mirrored: {photo.src_path}")
         else:
             name = Path(photo.original_filename)
             new_filename = name.stem + photo.gid[-4:] + name.suffix
             dest = dest.parent / new_filename
             self.copy_file(photo=photo, dest=dest)
             logger.info(f"Mirrored {photo.src_path} to {dest}")
     photo.update(mirrored=True)
예제 #11
0
파일: app.py 프로젝트: labroid/PhotoManager
    def add_candidates(self):
        self.state.reload()
        if self.state.target == self.state.old_target:
            return
        self.state.modify(old_target=self.state.target)
        message = 'Walking target directories...'
        logger.info(message)
        self.status(message)
        dirsize = 0

        start = datetime.datetime.now()
        self.state.modify(dirlist=list(glob.iglob(self.state.target)))
        logger.info(f"Target list: {self.state.dirlist}")
        for top in self.state.dirlist:
            message = f'Traversing tree at {top} and adding to queue.'
            logger.info(message)
            self.status(message)
            top_path = Path(top)
            for path in top_path.rglob("**/*"):
                ext = path.suffix.lower()
                if ext in cfg.local.image_filetypes:
                    size = path.stat().st_size
                    dirsize += size
                    Queue(src_path=str(path), size=size).save()
                else:
                    ext = ext.replace(
                        ".", ""
                    )  # Database can't handle keys starting with dot
                    excluded = self.state.excluded_ext_dict
                    if ext in excluded:
                        excluded[ext] += 1
                    else:
                        excluded[ext] = 1
                    self.state.update(excluded_ext_dict=excluded)
        self.state.save()
        elapsed = datetime.datetime.now() - start
        self.state.modify(
            dirsize=self.state.dirsize + dirsize,
            dirtime=elapsed.seconds + elapsed.microseconds / 1e6,
        )
        return
예제 #12
0
    train_examples, val_examples = load_train_val_examples(args)
    trainer.train(args, train_examples, val_examples)

    def do_eval(args):
        args.model_path = args.best_model_path
        eval_examples = load_eval_examples(args.eval_file)
        model = load_model(args)
        trainer.evaluate(args, model, eval_examples)

    def do_predict(args):
        args.model_path = args.best_model_path
        test_examples = load_test_examples(args)
        model = load_model(args)
        trainer.predict(args, model, test_examples)

    #do_predict(args)
    #print(trainer.pred_results)


if __name__ == '__main__':

    def add_special_args(parser):
        parser.add_argument("--generate_submission",
                            action="store_true",
                            help="")
        return parser

    args = get_args(experiment_params=experiment_params,
                    special_args=[add_special_args])
    logger.info(f"args: {args}")
    main(args)
예제 #13
0
logger.debug("That's it, beautiful and simple logging!")
logger.add("file_{time}.log")

logger.add("file_1.log",
           rotation="500 MB")  # Automatically rotate too big file
logger.add("file_2.log",
           rotation="12:00")  # New file is created each day at noon
logger.add("file_3.log",
           rotation="1 week")  # Once the file is too old, it's rotated

logger.add("file_X.log", retention="10 days")  # Cleanup after some time

logger.add("file_Y.log", compression="zip")  # Save some loved space

logger.info("testing...")
"""Modern string formatting using braces style"""

logger.info("If you're using Python {}, prefer {feature} of course!",
            3.6,
            feature="f-strings")
"""Exceptions catching within threads or main"""


@logger.catch
def my_function(x, y, z):
    # An error? It's caught anyway!
    return 1 / (x + y + z)


my_function(0, 0, 0)
예제 #14
0
파일: mbank.py 프로젝트: pdulak/konta-kt
def load_csv():
    # load all csv files from temp dir
    source_dir = os.path.join(settings.BASE_DIR, 'temp')
    field_names = [
        'Date', 'Added', 'Type', 'Description', 'Party Name', 'Party IBAN',
        'Amount', 'Balance', 'Account Number'
    ]
    df = pd.DataFrame()

    list_of_files = glob.glob(os.path.join(source_dir, '*.csv'))
    for this_file in list_of_files:
        logger.info("adding {}".format(this_file))
        this_account_number = ''
        this_data = ''
        save_account_number = False
        save_transactions = False
        with open(this_file, encoding='cp1250') as f:
            for line in f:
                if line[:15] == '#Numer rachunku':
                    save_account_number = True
                elif line[:14] == '#Data operacji':
                    save_transactions = True
                elif save_account_number:
                    this_account_number = line[:32].replace(' ', '')
                    save_account_number = False
                elif save_transactions:
                    if line[0] == '2':
                        this_data += line.replace("'", '"').replace('";"', "|;|").\
                            replace('";', "|;").replace(';"', ";|")
                    else:
                        save_transactions = False

        df_temp = pd.read_csv(StringIO(this_data),
                              sep=';',
                              comment='#',
                              engine='python',
                              names=field_names,
                              quotechar='|',
                              encoding='cp1250',
                              dtype={'Party IBAN': 'str'})

        df_temp['Account Number'] = this_account_number
        df_temp['Source'] = os.path.basename(this_file)
        df_temp['Description'] = df_temp['Description'].fillna('')
        df_temp['Party IBAN'] = df_temp['Party IBAN'].fillna('')
        df_temp['Party Name'] = df_temp['Party Name'].fillna('')
        df_temp['Type'] = df_temp['Type'].fillna('')

        df_temp['Date Modified'] = df_temp['Description'].map(
            extract_date_from_description)
        df_temp['Description'] = df_temp['Description'].map(trim_text_fields)
        df_temp['Party Name'] = df_temp['Party Name'].map(trim_text_fields)
        df_temp['Type'] = df_temp['Type'].map(trim_text_fields)

        df_temp['Date Modified'] = df_temp['Date Modified'].fillna(
            df_temp['Date'])
        df_temp['Amount Modified'] = df_temp['Amount'].map(
            lambda x: float(re.sub('[^0-9\,\.-]', '', x).replace(',', '.')))
        df_temp['Balance Modified'] = df_temp['Balance'].map(
            lambda x: float(re.sub('[^0-9\,\.-]', '', x).replace(',', '.')))

        df = df.append(df_temp, ignore_index=True)
        logger.info("added {}".format(df_temp.shape))

    logger.info("whole DF shape: {}".format(df.shape))
    logger.info(df.info())

    return df
예제 #15
0
 def _send(self, data):
     logger.info(f'Sending {data} to {self.channel_name}')
     self.client.publish(self.channel_name, data)
     self.client.loop_write()
예제 #16
0
 async def post(self, url, data):
     logger.info(f'call {url}')
     result = await self.session.post(url, data=data)
     result_json = await result.json()
     return result_json
예제 #17
0
 async def wrapper(ctx, *args, **kwargs):
     logger.info(
         f"/{f.__name__} command trigger by user. user={ctx.message.author!r}"
     )
     await f(ctx, *args, **kwargs)
예제 #18
0
def rotation_reopening(tmpdir, monkeypatch_date, delay):
    monkeypatch_date(2018, 10, 27, 5, 0, 0, 0)
    filepath = tmpdir.join("test.log")
    i = logger.add(str(filepath), format="{message}", delay=delay, rotation="2 h")
    logger.info("1")
    monkeypatch_date(2018, 10, 27, 6, 30, 0, 0)
    logger.info("2")
    logger.remove(i)
    i = logger.add(str(filepath), format="{message}", delay=delay, rotation="2 h")
    logger.info("3")

    assert len(tmpdir.listdir()) == 1
    assert filepath.read() == "1\n2\n3\n"

    monkeypatch_date(2018, 10, 27, 7, 30, 0, 0)
    logger.info("4")

    assert len(tmpdir.listdir()) == 2
    assert filepath.read() == "4\n"

    logger.remove(i)
    monkeypatch_date(2018, 10, 27, 8, 30, 0, 0)

    i = logger.add(str(filepath), format="{message}", delay=delay, rotation="2 h")
    logger.info("5")

    assert len(tmpdir.listdir()) == 2
    assert filepath.read() == "4\n5\n"

    monkeypatch_date(2018, 10, 27, 10, 0, 0, 0)
    logger.info("6")
    logger.remove(i)

    assert len(tmpdir.listdir()) == 3
    assert filepath.read() == "6\n"
    logger.debug(f'msg0 <- {hexlify(msg0)}')
    logger.debug(f'msg1 <- {hexlify(msg1)}')

    msg2 = ra.receive_msg1(msg1)
    logger.debug(f'msg2 -> {hexlify(msg1)}')

    msg3, mk_hash, sk_hash = c.remote_attestation_update(msg2)
    logger.debug(f'msg3 <- {hexlify(msg3)}')

    report, advisory = ra.recv_msg3_verify(msg3)

    v_mk = mk_hash == SHA256.new(ra.mk).digest()
    v_sk = sk_hash == SHA256.new(ra.sk).digest()
    if v_mk and v_sk:
        logger.info(f'SHA256(MK) = {hexlify(mk_hash)}')
        logger.info(f'SHA256(SK) = {hexlify(sk_hash)}')
    else:
        raise Exception("Remote Attestation Failed")

    # Coin tossing

    # coin = int.from_bytes(get_random_bytes(4), 'little')
    coin = 0x123456
    # iv = unhexlify('5016e1d232e7d0f01e15ce610356054c')
    # key = unhexlify('beb4b02dbce8708c1c0d030f40909b61') # ra.sk
    key = ra.sk

    cipher = AES.new(key, AES.MODE_GCM, nonce=get_random_bytes(12))
    ciphertext, tag = cipher.encrypt_and_digest(int.to_bytes(coin, 4, 'little'))
    iv = cipher.nonce
예제 #20
0
 def get_connector_info(connector_name: str) -> dict:
     logger.info("Get connector information for {}", connector_name)
     response = httpx.get(f"{url}/connectors/{connector_name}")
     info: dict = response.json()
     return info
예제 #21
0
def main(args_list=None):
    if args_list is None:
        args_list = sys.argv[1:]
    args = parser.parse_args(args_list)

    s3 = S3Backup(bucket_name=args.s3_bucket)
    screenshotter = Screenshotter(local_dir=args.temp_dir,
                                  s3_backup=s3,
                                  phantomjscloud_key=args.phantomjscloud_key)

    # get states info from API
    url = 'https://covidtracking.com/api/states/info.csv'
    content = requests.get(url).content
    state_info_df = pd.read_csv(io.StringIO(content.decode('utf-8')))

    failed_states = []

    # screenshot state images
    if args.states:
        logger.info(f'Snapshotting states {args.states}')
        states_list = args.states.split(',')
        state_info_df = state_info_df[state_info_df.state.isin(states_list)]

    for idx, r in state_info_df.iterrows():
        state = r["state"]
        data_url = r["covid19Site"]
        secondary_data_url = r["covid19SiteSecondary"]
        tertiary_data_url = r["covid19SiteTertiary"]
        try:
            screenshotter.screenshot(
                state,
                data_url,
                backup_to_s3=args.push_to_s3,
                replace_most_recent_snapshot=args.replace_most_recent_snapshot)
            if not pd.isnull(secondary_data_url):
                screenshotter.screenshot(state,
                                         secondary_data_url,
                                         suffix='secondary',
                                         backup_to_s3=args.push_to_s3,
                                         replace_most_recent_snapshot=args.
                                         replace_most_recent_snapshot)
            if not pd.isnull(tertiary_data_url):
                screenshotter.screenshot(state,
                                         tertiary_data_url,
                                         suffix='tertiary',
                                         backup_to_s3=args.push_to_s3,
                                         replace_most_recent_snapshot=args.
                                         replace_most_recent_snapshot)
        except:
            failed_states.append(state)

    if failed_states:
        logger.error(f"Failed states for this run: {','.join(failed_states)}")
    else:
        logger.info("All required states successfully screenshotted")

    # special-case: screenshot CDC "US Cases" and "US COVID Testing" tabs
    cdc_link = 'https://www.cdc.gov/covid-data-tracker/'
    screenshotter.screenshot('CDC',
                             cdc_link,
                             suffix='testing',
                             backup_to_s3=args.push_to_s3)
    screenshotter.screenshot('CDC',
                             cdc_link,
                             suffix='cases',
                             backup_to_s3=args.push_to_s3)
예제 #22
0
    def save_url_image_to_path(self, state, data_url, path):
        """Saves URL image from data_url to the specified path.

        Parameters
        ----------
        state : str
            Two-letter abbreviation of the state or territory. Used for special-casing sizes, etc.

        data_url : str
            URL of data site to save

        path : str
            Local path to which to save .png screenshot of data_url
        """
        logger.info(f"Retrieving {data_url}")

        # if we're just saving a PDF, don't use phantomjscloud
        if path.endswith('.pdf'):
            logger.info(f"Downloading PDF from {data_url}")
            response = requests.get(data_url)
            if response.status_code == 200:
                with open(path, 'wb') as f:
                    f.write(response.content)
                return
            else:
                logger.error(f'Response status code: {response.status_code}')
                raise ValueError(f'Could not retrieve URL: {data_url}')

        data = {
            'url': data_url,
            'renderType': 'png',
        }

        if state in ['WA', 'TX', 'PA']:
            # wait longer for load
            logger.info(f"waiting 30 sec to load state {state}")
            data['overseerScript'] = """page.manualWait();
                                      await page.waitForDelay(30000);
                                      page.done();"""

        # IA: need to load IFrame separately
        if state == 'IA':
            data['url'] = 'https://public.domo.com/embed/pages/dPRol'

        # add a hover for ID secondary to get more data
        if state == 'ID':
            if 'secondary' in path:
                logger.info(
                    'Custom mouseover logic for ID secondary dashboard')
                data['overseerScript'] = """page.manualWait();
                                          await page.waitForSelector("#tabZoneId10");
                                          await page.hover("#tabZoneId10");
                                          page.done();"""
            else:
                logger.info(f"using larger viewport for state {state}")
                data['renderSettings'] = {
                    'viewport': {
                        'width': 1400,
                        'height': 3000
                    }
                }

        # PhantomJScloud gets the page length wrong for some states, need to set those manually
        if state in ['PA', 'CA', 'IA']:
            logger.info(f"using larger viewport for state {state}")
            data['renderSettings'] = {
                'viewport': {
                    'width': 1400,
                    'height': 3000
                }
            }

        if state == 'NE':
            # needs really huge viewport for some reason
            logger.info(f"using huge viewport for state {state}")
            data['renderSettings'] = {
                'viewport': {
                    'width': 1400,
                    'height': 5000
                }
            }

        if state == 'UT':
            # Utah dashboard doesn't render in phantomjscloud unless I set clipRectangle
            data['renderSettings'] = {
                'clipRectangle': {
                    'width': 1400,
                    'height': 3000
                }
            }

        # Indiana needs a huge viewport and has a popup
        if state == 'IN':
            logger.info(f"using huger viewport for state {state}")
            data['renderSettings'] = {
                'viewport': {
                    'width': 1400,
                    'height': 8500
                }
            }
            # click button to get rid of popup
            data['overseerScript'] = 'page.manualWait(); \
                                      await page.waitForSelector("#prefix-dismissButton"); \
                                      page.click("#prefix-dismissButton"); \
                                      await page.waitForFunction(()=>document.querySelector("#main-content").textContent!==""); \
                                      page.done();'

        # for the CDC testing tab, need to do clicking magic
        if state == 'CDC' and 'testing' in path:
            # try clicking on a tab somewhere there
            logger.info(f"Custom CDC logic")
            data['overseerScript'] = """page.manualWait();
                                      await page.waitForSelector("[data-tabname='tabAllLabs']");
                                      page.click("[data-tabname='tabAllLabs']", {delay: 150});
                                      await page.waitForFunction(()=>document.querySelector("#mainContent_Title").textContent=="United States Laboratory Testing");
                                      await page.waitForDelay(1000);
                                      page.done();"""
        logger.info('Posting request...')
        response = requests.post(self.phantomjs_url, json.dumps(data))
        logger.info('Done.')

        if response.status_code == 200:
            with open(path, 'wb') as f:
                f.write(response.content)
        else:
            logger.error(f'Response status code: {response.status_code}')
            raise ValueError(f'Could not retrieve URL: {data_url}')
예제 #23
0
def load_flightradar_data():
    """
    Load the airport database used to do smart calls to Skyscanner API.
    This DB can be reconstructed thanks to the recompute_airport_database (see end of file)
    """
    path = os.path.join(os.getcwd(), 'scrap_flight_number_mieux.csv')  #
    try:
        logger.info(path)
        flight_data = pd.read_csv(path)
        logger.info('load the flightradar data. Here is a random example :')
        logger.info(flight_data.sample(1))
        return flight_data
    except:
        try:
            logger.info(os.path.join(os.getcwd(), 'api/app/', 'data/scrap_flight_number_mieux.csv'))
            flight_data = pd.read_csv(os.path.join(os.getcwd(), 'api/app/', 'data/scrap_flight_number_mieux.csv'), sep=',')
            logger.info('load the flightradar data. Here is a random example :')
            logger.info(flight_data.sample(1))
            return flight_data
        except:
            logger.info(os.path.join(os.getcwd(), 'app/', 'data/scrap_flight_number_mieux.csv'))
            flight_data = pd.read_csv(os.path.join(os.getcwd(), 'app/', 'data/scrap_flight_number_mieux.csv'))
            logger.info('load the flightradar data. Here is a random example :')
            logger.info(flight_data.sample(1))
            return flight_data
예제 #24
0
def load_airport_database():
    """
    Load the airport database used to do smart calls to Skyscanner API.
    This DB can be reconstructed thanks to the recompute_airport_database (see end of file)
    """
    path = os.path.join(os.getcwd(), 'skyscanner_europe_airport_list.csv')  #
    try:
        logger.info(path)
        airport_list = pd.read_csv(path)
        airport_list['geoloc'] = airport_list.apply(lambda x: [x.latitude, x.longitude], axis=1)
        logger.info('load the skyscanner airport db. Here is a random example :')
        logger.info(airport_list.sample(1))
        return airport_list
    except:
        try:
            logger.info(os.path.join(os.getcwd(), 'api/app/', 'data/skyscanner_europe_airport_list.csv'))
            airport_list = pd.read_csv(os.path.join(os.getcwd(), 'api/app/', 'data/skyscanner_europe_airport_list.csv'), sep=',')
            airport_list['geoloc'] = airport_list.apply(lambda x: [x.latitude, x.longitude], axis=1)
            logger.info('load the skyscanner airport db. Here is a random example :')
            logger.info(airport_list.sample(1))
            return airport_list
        except:
            logger.info(os.path.join(os.getcwd(), 'app/', 'data/skyscanner_europe_airport_list.csv'))
            airport_list = pd.read_csv(os.path.join(os.getcwd(), 'app/', 'data/skyscanner_europe_airport_list.csv'))
            airport_list['geoloc'] = airport_list.apply(lambda x: [x.latitude, x.longitude], axis=1)
            logger.info('load the skyscanner airport db. Here is a random example :')
            logger.info(airport_list.sample(1))
            return airport_list
예제 #25
0
def get_planes_from_skyscanner(date_departure, date_return, departure, arrival, details=False, try_number=1):
    """
    Here we actually call the Skyscanner API with all the relevant information asked by the user
    First we create a session with the first POST request, then we read the results with the 2nd GET request
    For each of the calls we try to deal with all the potential errors the API could return, most importantly
        we want to wait and try again if the API says it's too busy right now
    """
    # format date as yyyy-mm-dd
    date_formated = str(date_departure)[0:10]
    # logger.info(f'get_planes try nb {try_number}')
    one_way = date_return is None

    api_key = tmw_api_keys.SKYSCANNER_API_KEY
    quote_url = 'http://partners.api.skyscanner.net/apiservices/pricing/v1.0'

    headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'}

    data = {'country': 'FR', 'currency': 'EUR', 'locale': 'en-US', 'originplace': departure,
            'destinationplace': arrival, 'outbounddate': date_formated}

    data['apiKey'] = api_key

    # If another call to Skyscanner has already been successful, we don't want to waste too much time
    # so we decrease the max number of retries
    max_retries = 5

    # First POST request to create session
    time_before_call = time.perf_counter()
    response = requests.post(quote_url, headers=headers, data=data)

    # get session key
    # In some cases the API won't give a session key "Location" so we try and except
    try:
        # print(response.headers)
        key = response.headers['Location'].split('/')[-1]
        session_url = response.headers['Location']
    except:
        # Retry calling API 3 times
        try:
            # Let's look at the error from the API
            error = response.json()['ValidationErrors']
            logger.warning(error)
            # When the API says it's too busy, if we haven't past the max number of retires
            if (error[0]['Message'] == 'Rate limit has been exceeded: 400 PerMinute for PricingSession') & (
                    try_number < max_retries):
                time.sleep(1)
                logger.info(f'we try our luck for chance {try_number + 1} out of 3 with Skyscanner')
                # We call this same function again with the try_number increased by one
                return get_planes_from_skyscanner(date_departure, date_return, departure, arrival,
                                                  details=True, try_number=try_number + 1)
            else:
                # we couldn't find any trips through the API so we return an empty DF
                logger.info(f'out because {error}')
                return pd.DataFrame()
        except:
            # If the API said we called too much, we wait a little bit more and try again (YOLO)
            if (response.status_code == 429) & (try_number < max_retries):
                time.sleep(1.5)
                # We call this same function again with the try_number increased by one
                return get_planes_from_skyscanner(date_departure, date_return, departure, arrival,
                                                  details=True, try_number=try_number + 1)

            # Otherwise we return an empty DF
            logger.warning('The Skyscanner API returned an unknown error')
            logger.warning(response.json())
            return pd.DataFrame()

    # Now we construct the 2nd request to extract the results from the session we just created
    url = 'https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/pricing/uk2/v1.0/' + key
    # We only take the first page, 100 first results
    querystring = {"pageIndex": "0", "pageSize": "100"}
    data['pageIndex'] = 0
    data['pageSize'] = 100
    # response = requests.request("GET", url, headers=headers, params=querystring)
    response = requests.get(session_url, headers=headers, params=data)

    # logger.info(f'status code of get is {response.status_code}')
    # logger.info(response.content)
    if response.status_code == 429:
        if try_number < max_retries:
            time.sleep(1)
            return get_planes_from_skyscanner(date_departure, date_return, departure, arrival,
                                              details=True, try_number=try_number + 1)
        else:
            logger.warning(f'Error {response.status_code} with Skyscanner API')
            return pd.DataFrame()
    try:
        # When the API says it's too busy, or if the response is not complete yet we send the request again
        # However we don't exceed the max number of retry and we don't wait for more than 5 sec
        #   for the response to be completed
        while response.json()['Status'] != 'UpdatesComplete':
            response = requests.get(session_url, headers=headers, params=data)
    except:
        if response.status_code == 200:
            # Should not happen
            logger.info('out because chai po')
            logger.info(response.json()['Status'])
            logger.info(response.json()['Legs'])
            return pd.DataFrame()

    try:
        # When the response actually contains something we call the fromat fonction to
        #    regroup all the necessary infos
        if len(response.json()['Legs']) > 0:
            # logger.info(f'Skyscanner API call duration {time.perf_counter() - time_before_call}')
            return format_skyscanner_response(response.json(), date_departure, departure, arrival, one_way, details)
        else:
            # The API could not find any trips
            # logger.info('out because no legs. Looked like this though')
            # logger.info(response.status_code)
            # logger.info(response.json())
            # logger.info(f'Skyscanner API call duration {time.perf_counter() - time_before_call}')
            return pd.DataFrame()
    except:
        # Should not happen
        logger.info('bad abd bad')
        logger.info(f'json looks like {response.json()}')
        return pd.DataFrame()
예제 #26
0
    def match(self):
        # runs in separate thread

        try:
            self.h, self.w = self.im.shape
            # degrees per pixel
            dpp = self.binning * (self.pixel_height * 1e-6 /
                                  self.focal_length) * (206265 / 3.6)
            fov = self.h * dpp

            if self.star_source == 'first sub':
                ''' use already extracted keystars and mags from first sub; appears to fail on 
                    dense images
                '''
                aligner = Component.get('Aligner')
                centroids = aligner.keystars
                # do flips, though it would be good to get this working without flips in the future
                view = Component.get('View')
                x = self.w - centroids[:, 0] if view.flip_LR else centroids[:,
                                                                            0]
                y = self.h - centroids[:, 1] if view.flip_UD else centroids[:,
                                                                            1]
                flux = aligner.mags

            elif self.star_source == 'current sub':
                centroids = Component.get(
                    'Stacker').get_centroids_for_platesolving()
                view = Component.get('View')
                x = self.w - centroids[:, 0] if view.flip_LR else centroids[:,
                                                                            0]
                y = self.h - centroids[:, 1] if view.flip_UD else centroids[:,
                                                                            1]
                flux = centroids[:, 2]

            else:
                ''' alternative approach is to reanalyse (possible stacked) image to extract stars, but 
                    field rotation artefacts can cause issues with star extraction
                '''
                star_thresh = 0.001
                blobs = blob_dog(self.im,
                                 min_sigma=3,
                                 max_sigma=5,
                                 threshold=star_thresh,
                                 overlap=0)[:, [1, 0]]
                centroids = star_centroids(self.im, blobs)
                x = centroids[:, 0]
                y = centroids[:, 1]
                flux = centroids[:, 2]

            #  convert flux to relative magnitude
            mags = -2.5 * np.log10(flux / np.max(flux))

            logger.debug(
                'star source {:}; nstars = {:}; mag range {:.1f}'.format(
                    self.star_source, len(x), np.max(mags)))

            # select N-brightest
            inds = np.argsort(mags)[:self.n_stars_in_image]
            x_im, y_im = x[inds], y[inds]

            if len(x_im) < self.min_matches:
                toast('Too few stars to platesolve (min: {:})'.format(
                    len(x_im)))
                return

            # get reference stars for search field (NB larger fov is worse)
            ras, decs, mags = Component.get(
                'Catalogues').get_platesolving_stars(
                    make_tile(self.ra0, self.dec0, fov=2 * fov))

            #  sort by magnitude and limit to N-brightest (should depend on area re stars extracted in central zone)
            inds = np.argsort(mags)[:80]

            #  convert to cartesian coords
            ras, decs = Eq2Cart(ras[inds], decs[inds], self.ra0, self.dec0)

            # do fastmatch with ref and im stars in degrees
            matches = fastmatch(
                ref=np.degrees(np.vstack([ras, decs]).T),
                im=dpp * np.vstack([x_im, y_im]).T,
                match_arcsec=self.match_arcsec,
                min_matches=self.min_matches,
                mag_range=self.mag_range,
                proximity=0.05,
                first_match=self.first_match,
            )

            # check if we have a result
            if matches is None:
                # self.warn('failed to match')
                toast('Failed to solve: is image flipped correctly?')
                logger.warning('no match, {:} im stars, {:} ref stars'.format(
                    len(x_im), len(ras)))
                return

            # use result to find transform from ref in cartesian to im  in pixels
            src = np.vstack([ras, decs]).T[[j for (i, j) in matches], :]
            dst = np.vstack([x_im, y_im]).T[[i for (i, j) in matches], :]

            # consider doing RANSAC or alternative transform here
            # self.cart2pix = estimate_transform('similarity', src, dst)
            self.cart2pix = estimate_transform('affine', src, dst)

            # find centre of frame in RA/Dec
            self.tile_ra0, self.tile_dec0 = self.pixels_to_ra_dec(
                self.h // 2, self.w // 2)

            # estimate FOV
            self.FOV_h = spherical_distance(*self.pixels_to_ra_dec(0, 0),
                                            *self.pixels_to_ra_dec(self.h, 0))
            self.FOV_w = spherical_distance(*self.pixels_to_ra_dec(0, 0),
                                            *self.pixels_to_ra_dec(0, self.w))

            #  and compute where North is
            xx, yy = self.ra_dec_to_pixels(
                np.array([self.ra0, self.ra0]),
                np.array([self.dec0 - 0.5, self.dec0 + 0.5]),
            )
            self.north = to360(
                90 - np.degrees(np.arctan2(yy[1] - yy[0], xx[1] - xx[0])))

            desc = '{:5.3f} x {:5.3f}, {:.0f}\u00b0, RA: {:} Dec: {:}'.format(
                self.FOV_w, self.FOV_h, self.north, str(RA(self.tile_ra0)),
                str(Dec(self.tile_dec0)))

            toast('Solved ({:} matched)'.format(len(matches)), duration=1)
            logger.info(desc)
            self.info('{:.2f}\u00b0 x {:.2f}\u00b0 | {:} | {:}'.format(
                self.FOV_w, self.FOV_h, str(RA(self.tile_ra0)),
                str(Dec(self.tile_dec0))))

        except Exception as e:
            logger.exception('error in match {:}'.format(e))
    def sign_in_by_question(self, entry, config):
        base_response = self._request(entry, 'get', entry['url'], headers=entry['headers'])
        sign_in_state, base_content = self.check_sign_in_state(entry, base_response, entry['url'])
        if sign_in_state != SignState.NO_SIGN_IN:
            return

        question_element = get_soup(base_content).select_one('input[name="questionid"]')
        if question_element:
            question_id = question_element.get('value')

            local_answer = None

            question_file_path = os.path.dirname(__file__) + '/question.json'
            if Path(question_file_path).is_file():
                with open(question_file_path) as question_file:
                    question_json = json.loads(question_file.read())
            else:
                question_json = {}

            question_extend_file_path = os.path.dirname(__file__) + '/question_extend.json'
            if Path(question_extend_file_path).is_file():
                with open(question_extend_file_path) as question_extend_file:
                    question_extend_json = json.loads(question_extend_file.read())
                os.remove(question_extend_file_path)
            else:
                question_extend_json = {}

            self._dict_merge(question_json, question_extend_json)

            site_question = question_json.get(entry['url'])
            if site_question:
                local_answer = site_question.get(question_id)
            else:
                question_json[entry['url']] = {}

            choice_elements = get_soup(base_content).select('input[name="choice[]"]')
            choices = []
            for choice_element in choice_elements:
                choices.append(choice_element.get('value', ''))

            if choice_elements[0].get('type') == 'radio':
                choice_range = 1
            else:
                choice_range = len(choices)

            answer_list = []

            for i in range(choice_range):
                for arr in itertools.combinations(choices, i + 1):
                    if list(arr) not in answer_list:
                        answer_list.append(list(arr))
            answer_list.reverse()
            if local_answer and local_answer in choices and len(local_answer) <= choice_range:
                answer_list.insert(0, local_answer)
            times = 0
            for answer in answer_list:
                data = {'questionid': question_id, 'choice[]': answer, 'usercomment': '此刻心情:无', 'submit': '提交'}
                response = self._request(entry, 'post', entry['url'], data=data)
                state, content = self.check_sign_in_state(entry, response, entry['url'])
                if state == SignState.SUCCEED:
                    entry['result'] = '{} ( {} attempts.)'.format(entry['result'], times)

                    question_json[entry['url']][question_id] = answer
                    with open(question_file_path, mode='w') as question_file:
                        json.dump(question_json, question_file)
                    logger.info('{}, correct answer: {}', entry['title'], data)
                    return
                times += 1
        entry['result'] = SignState.SIGN_IN_FAILED.value.format('No answer')
        entry.fail(entry['result'])
예제 #28
0
    def send_request(self):
        while True:
            ts = int(time.time())
            if ts % 60 != 0:
                time.sleep(0.9)
                continue
            data_list = requests.get(
                "https://www.okex.com/api/futures/v3/instruments").json()
            data_list += requests.get(
                "https://www.okex.com/api/swap/v3/instruments").json()

            item_list = []
            for data in data_list:
                if data['is_inverse'] == "true" or data['is_inverse'] is True:
                    item = {}
                    item['symbol'] = data['instrument_id']
                    item['quote'] = data['quote_currency']
                    if data.get('alias'):
                        timeid = data.get('alias')
                        if timeid == 'this_week':
                            item['timeid'] = 'CW'
                        elif timeid == 'next_week':
                            item['timeid'] = 'NW'
                        elif timeid == 'quarter':
                            item['timeid'] = 'CQ'
                        else:
                            item['timeid'] = 'NQ'
                    else:
                        item['timeid'] = 'SWAP'
                    item_list.append(item)

            data_list = [
                item for item in item_list
                if item['symbol'].split("-")[0] == "BTC"
            ]
            print(data_list)

            for data in data_list:
                if data['symbol'].split("-")[0] == "BTC":
                    if data['timeid'] == 'SWAP':
                        url = self.swap_url.format(data['symbol'])
                    else:
                        url = self.futures_url.format(data['symbol'])

                    while True:
                        try:
                            response = requests.get(
                                url,
                                proxies={
                                    "https":
                                    "http://127.0.0.1:{}".format(
                                        random.randint(8081, 8323))
                                })
                            self.parse_response(response, data['timeid'], ts)
                            break
                        except Exception as e:
                            logger.error(e)
                            logger.error("正在重新发送请求...")

            logger.info("采集结束,一分钟后再次采集...")
            time.sleep(20)
예제 #29
0
 async def upload_item(self, item: UploadItem):
     logger.info('Uploading item')
     await self.pre_upload(item)
     await self.upload_video(item)
     # TODO delay post_to_wall
     return await self.post_to_wall(item)
예제 #30
0
def test_time_rotation_reopening_native(tmpdir_local, delay):
    filepath = str(tmpdir_local / "test.log")
    i = logger.add(filepath, format="{message}", delay=delay, rotation="1 s")
    logger.info("1")
    time.sleep(0.75)
    logger.info("2")
    logger.remove(i)
    i = logger.add(filepath, format="{message}", delay=delay, rotation="1 s")
    logger.info("3")

    assert len(list(tmpdir_local.iterdir())) == 1
    assert (tmpdir_local / "test.log").read_text() == "1\n2\n3\n"

    time.sleep(0.5)
    logger.info("4")

    assert len(list(tmpdir_local.iterdir())) == 2
    assert (tmpdir_local / "test.log").read_text() == "4\n"

    logger.remove(i)
    time.sleep(0.5)
    i = logger.add(filepath, format="{message}", delay=delay, rotation="1 s")
    logger.info("5")

    assert len(list(tmpdir_local.iterdir())) == 2
    assert (tmpdir_local / "test.log").read_text() == "4\n5\n"

    time.sleep(0.75)
    logger.info("6")
    logger.remove(i)

    assert len(list(tmpdir_local.iterdir())) == 3
    assert (tmpdir_local / "test.log").read_text() == "6\n"
예제 #31
0
 async def check_update(cls):
     logger.info("开始检查RSS更新")
     all_subs = await SubContent.query.gino.all()
     if all_subs:
         await asyncio.wait([cls._check_one(sub) for sub in all_subs])
     logger.info("结束检查RSS更新")
예제 #32
0
def db_conn():
    DB_PATH = Path(__file__).parents[3] / "db" / "results.db"
    logger.info(f"Connecting to DB: {str(DB_PATH)}")
    return sqlite3.connect(str(DB_PATH), check_same_thread=False)
예제 #33
0
def run(config: typing.Union[dict, str]):
    """
    run with config

    :param config: config file path, or a preload dict
    :return:
    """
    class _VideoUserConfig(BaseModel):
        path: str
        pre_load: bool = True
        fps: int = None

    class _CutterUserConfig(BaseModel):
        threshold: float = None
        frame_count: int = None
        offset: int = None
        limit: int = None
        block: int = None

        # common
        compress_rate: float = None
        target_size: typing.Tuple[int, int] = None

    class _ClassifierType(Enum):
        SVM = "svm"
        KERAS = "keras"

    class _ClassifierUserConfig(BaseModel):
        boost_mode: bool = None
        classifier_type: _ClassifierType = _ClassifierType.SVM
        model: str = None

        # common
        compress_rate: float = None
        target_size: typing.Tuple[int, int] = None

    class _CalcOperatorType(Enum):
        BETWEEN = "between"
        DISPLAY = "display"

    class _CalcOperator(BaseModel):
        name: str
        calc_type: _CalcOperatorType
        args: dict = dict()

    class _CalcUserConfig(BaseModel):
        output: str = None
        ignore_error: bool = None
        operators: typing.List[_CalcOperator] = None

    class _ExtraUserConfig(BaseModel):
        save_train_set: str = None

    class UserConfig(BaseModel):
        output: str
        video: _VideoUserConfig
        cutter: _CutterUserConfig = _CutterUserConfig()
        classifier: _ClassifierUserConfig = _ClassifierUserConfig()
        calc: _CalcUserConfig = _CalcUserConfig()
        extras: _ExtraUserConfig = _ExtraUserConfig()

    if isinstance(config, str):
        # path
        config_path = pathlib.Path(config)
        assert config_path.is_file(), f"no config file found in {config_path}"

        # todo: support different types in the future
        assert config_path.as_posix().endswith(
            ".json"), "config file should be json format"
        with open(config_path, encoding=constants.CHARSET) as f:
            config = json.load(f)

    config = UserConfig(**config)
    logger.info(f"config: {config}")

    # main flow
    video = VideoObject(
        # fmt: off
        path=config.video.path,
        fps=config.video.fps,
    )
    if config.video.pre_load:
        video.load_frames()

    # cut
    cutter = VideoCutter(
        # fmt: off
        compress_rate=config.cutter.compress_rate,
        target_size=config.cutter.target_size,
    )
    res = cutter.cut(
        # fmt: off
        video=video,
        block=config.cutter.block,
    )
    stable, unstable = res.get_range(
        # fmt: off
        threshold=config.cutter.threshold,
        offset=config.cutter.offset,
    )

    with tempfile.TemporaryDirectory() as temp_dir:
        # classify
        if config.classifier.classifier_type is _ClassifierType.SVM:
            cl = SVMClassifier(
                # fmt: off
                compress_rate=config.classifier.compress_rate,
                target_size=config.classifier.target_size,
            )
        elif config.classifier.classifier_type is _ClassifierType.KERAS:
            from stagesepx.classifier.keras import KerasClassifier

            cl = KerasClassifier(
                # fmt: off
                compress_rate=config.classifier.compress_rate,
                target_size=config.classifier.target_size,
            )
        # validation has been applied by pydantic
        # so no `else`

        if config.classifier.model:
            # no need to retrain
            model_path = pathlib.Path(config.classifier.model)
            assert model_path.is_file(), f"file {model_path} not existed"
            cl.load_model(model_path)
        else:
            # train a new model
            train_set_dir = config.extras.save_train_set or temp_dir
            os.makedirs(train_set_dir, exist_ok=True)

            res.pick_and_save(
                # fmt: off
                stable,
                frame_count=config.cutter.frame_count,
                to_dir=train_set_dir,
            )
            cl.train(data_path=train_set_dir)

    # start classifying
    classify_result = cl.classify(
        # fmt: off
        video,
        stable,
        boost_mode=config.classifier.boost_mode,
    )

    # calc
    def _calc_display() -> dict:
        # jsonify
        return json.loads(classify_result.dumps())

    def _calc_between(*, from_stage: str = None, to_stage: str = None) -> dict:
        assert classify_result.contain(
            from_stage), f"no stage {from_stage} found in result"
        assert classify_result.contain(
            to_stage), f"no stage {to_stage} found in result"
        from_frame = classify_result.last(from_stage)
        to_frame = classify_result.first(to_stage)
        cost = to_frame.timestamp - from_frame.timestamp
        return {
            "from": from_frame.frame_id,
            "to": to_frame.frame_id,
            "cost": cost,
        }

    _calc_func_dict = {
        _CalcOperatorType.BETWEEN: _calc_between,
        _CalcOperatorType.DISPLAY: _calc_display,
    }
    calc_output = config.calc.output
    if calc_output:
        output_path = pathlib.Path(calc_output)
        assert not output_path.is_file(), f"file {output_path} already existed"
        result = []
        for each_calc in config.calc.operators:
            func = _calc_func_dict[each_calc.calc_type]
            try:
                func_ret = func(**each_calc.args)
            except Exception as e:
                if not config.calc.ignore_error:
                    raise
                logger.warning(e)
                func_ret = traceback.format_exc()
            calc_ret = {
                "name": each_calc.name,
                "type": each_calc.calc_type.value,
                "result": func_ret,
            }
            result.append(calc_ret)
        with open(output_path, "w", encoding=constants.CHARSET) as f:
            json.dump(result, f)

    # draw
    r = Reporter()
    r.draw(
        # fmt: off
        classify_result,
        report_path=config.output,
    )
예제 #34
0
 async def upload_video(self, item: UploadItem):
     logger.info(f'Uploading video. Calling {item.upload_url}')
     with open(item.path, 'rb') as video:
         response = await self.post(item.upload_url, data=video)
     item.media_id = response['video_id']
    CURRENT_SETTING = f'rate_{args.rate}_ strategy_{args.strategy}'  # "rate" + str(2 ** index / 100) + "_EXP"
    os.makedirs('log', exist_ok=True)
    os.makedirs('ckpts', exist_ok=True)
    log_path = os.path.join('log', CURRENT_SETTING + '.log')
    if os.path.isfile(log_path):
        os.remove(log_path)
    logger.add(log_path)
    # net = resnet34(num_classes=10).cuda()
    MobileNetV2.cfg = current_config
    net = MobileNetV2().cuda()
    # train_dl = imagenette2('train')
    # valid_dl = imagenette2('val')
    train_dl, valid_dl = temp_load_data()
    # !important, in this case, no global pruning rate
    for pruning_rate in [1]:
        logger.info('set pruning rate = %.2f' % (1 - args.rate))
        # set pruning rate
        # for m in net.modules():
        #     if hasattr(m, 'rate'):
        #         if m.rate == 1:
        #             m.rate = pruning_rate
        #         elif m.rate == 0.99:
        #             m.rate = 1

        optimizer = torch.optim.SGD(net.parameters(),
                                    lr=0.1,
                                    momentum=0.9,
                                    weight_decay=0.0001)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, [30, 60, 80], 0.2)
        loss_function = torch.nn.CrossEntropyLoss()
예제 #36
0
def ping() -> Response:
    logger.info("Got ping request!")
    return make_response("pong")
예제 #37
0
    def record_duplicate(self, name: str, source: str, xurl: str, msg: str = ""):

        status = "duplicate"
        logger.info(f"  {name}: {status} {msg}")

        self.update_status(name, source, status, xurl, msg)
예제 #38
0
    def __generate_index(cls):
        # Generate index from given input MARC files.
        # forward index (main or variant identity string --> ctrl number/conflict)
        index, index_variants = {}, {}
        conflicts, conflicts_variants = set(), set()
        # reverse index (ctrl number --> authorized form string)
        index_reverse = {cls.UNVERIFIED: None, cls.CONFLICT: None}
        # relationship type index (relationship name --> list of types)
        index_rel_type = {}
        all_rel_types = set(
            ("Subordinate", "Superordinate", "Preordinate", "Postordinate",
             "Associative", "Dissociative", "Equivalence"))
        # bib to hdg for field transposition etc (bib id --> list of hdg ids)
        index_bib_to_hdg = {}

        with LMLDB() as db:
            for record_type, db_query in (('bib', db.get_bibs), ('auth',
                                                                 db.get_auts)):
                logger.info(f"reading {record_type}s...")
                for _, record in tqdm(db_query()):
                    # if relationship, add to rel type index
                    if record.get_broad_category() == 'Relationships':
                        rel_types = sorted(
                            list(all_rel_types
                                 & set(record.get_all_categories())))
                        rel_name = record['155']['a'].rstrip(': ')
                        index_rel_type[rel_name] = rel_types

                    # main indices
                    ctrlno, element_type, id_string, auth_form = record.get_identity_information(
                    )
                    if element_type and id_string:
                        # Record has a valid identity, add to indices
                        # Reverse
                        index_reverse[ctrlno] = auth_form
                        # Forward
                        # Main entry:
                        if element_type not in index:
                            index[element_type] = {}
                        if id_string in index[element_type]:
                            # Multiple main entries have this same identity tuple
                            conflicts.add((element_type, id_string))
                        else:
                            index[element_type][id_string] = ctrlno
                        # Variant entries:
                        # for Organization and Event subdivisions, add variant fields with concatenated divisions as a single ^a
                        if element_type in (ORGANIZATION, EVENT):
                            for field in record.get_fields('110', '410'):
                                if 'b' in field:
                                    record.add_field(
                                        Field('410', '2 ', ('a', ' '.join(
                                            field.get_subfields('a', 'b')))))
                            for field in record.get_fields('111', '411'):
                                if 'e' in field:
                                    record.add_field(
                                        Field('411', '2 ', ('a', ' '.join(
                                            field.get_subfields('a', 'e')))))
                        for variant_element_type, variant_id_string in record.get_variant_types_and_ids(
                        ):
                            if variant_element_type not in index_variants:
                                index_variants[variant_element_type] = {}
                            if variant_id_string in index_variants[
                                    variant_element_type]:
                                # Multiple variants have this same identity tuple
                                conflicts_variants.add(
                                    (variant_element_type, variant_id_string))
                            else:
                                index_variants[variant_element_type][
                                    variant_id_string] = ctrlno

            logger.info(f"reading hdgs...")
            for hdg_ctrlno, hdg_record in tqdm(db.get_hdgs()):
                bib_ctrlno = hdg_record['004'].data
                if bib_ctrlno not in index_bib_to_hdg:
                    index_bib_to_hdg[bib_ctrlno] = []
                index_bib_to_hdg[bib_ctrlno].append(hdg_ctrlno)

        # Go back and mark conflicts:
        # Conflicts within main identities
        for element_type, conflict in conflicts:
            index[element_type][conflict] = cls.CONFLICT
        # Conflicts within variant identities
        for element_type, conflict in conflicts_variants:
            index_variants[element_type][conflict] = cls.CONFLICT
        # If a variant identity is the same as a main identity,
        # the main one wins out
        for element_type, id_map in index.items():
            if element_type in index_variants:
                main_id_strings = set(id_map.keys())
                variant_id_strings = set(index_variants[element_type].keys())
                colliding_id_strings = main_id_strings & variant_id_strings
                for id_string in colliding_id_strings:
                    index_variants[element_type].pop(id_string)

        # Finally merge main and variant together
        for element_type, id_map in index.items():
            if element_type in index_variants:
                index[element_type].update(index_variants[element_type])

        cls.index, cls.index_reverse = index, index_reverse
        cls.index_rel_type, cls.index_bib_to_hdg = index_rel_type, index_bib_to_hdg
예제 #39
0
def als_l1b_dem_generation_workflow(als_filepath,
                                    grid_preset,
                                    parameter,
                                    dem_cfg,
                                    gps=None,
                                    metadata=None,
                                    **connect_keyw):
    """
    Creates quickview plots of a specific ALS l1b elevation file
    :param source_dir: (str) the path of the laserscanner file
    :param als_filename: (dict) configuration including the filename and the preset for gridding/quickview process
    :param grid_preset: (str) name of the gridding preset (sea_ice_low or sea_ice_high)
    :param gps: (xarray.Dataset) gps data of the entire flight
    :param metadata: (dict) metadata dictionary
    :param connect_keyw: (dict) keywords to be passed for alsfile.connect (e.g. device_name_override)
    :return: None
    """

    # Step 1: connect to the laserscanner file
    source_dir, als_filename = os.path.split(als_filepath)
    logger.info("Open ALS binary file: %s" % als_filename)
    try:
        alsfile = AirborneLaserScannerFile(als_filepath, **connect_keyw)
    except BaseException:
        logger.error("Unexpected error -> skip file")
        print(sys.exc_info()[1])
        return

    # Get the gridding settings
    dem_cfg = AlsDEMCfg.preset(grid_preset, **dem_cfg)

    # Get a segment list based on the suggested segment lengths for the gridding preset
    segments = alsfile.get_segment_list(dem_cfg.segment_len_secs)
    n_segments = len(segments)

    flightdata = None
    if gps is not None:
        logger.info("Adding GPS flight data")
        seconds = gps.TIME.values.astype(float) * 0.001
        time = alsfile.timestamp2time(seconds)
        flightdata = FlightGPSData(time, gps.LONGITUDE.values,
                                   gps.LATITUDE.values, gps.ALTITUDE.values)

    # Only necessary if multiprocessing is used
    logger.info("Split file in %d segments" % n_segments)
    for i, (start_sec, stop_sec) in enumerate(segments):

        logger.info("Processing %s [%g:%g] (%g/%g)" %
                    (als_filename, start_sec, stop_sec, i + 1, n_segments))

        # Extract the segment
        try:
            als = alsfile.get_data(start_sec, stop_sec)
        except BaseException:
            msg = "Unhandled exception while reading %s:%g-%g -> Skip segment"
            logger.error(msg % (als_filename, start_sec, stop_sec))
            print(sys.exc_info()[1])
            continue

        # Apply atmospheric filter
        atmfilter = AtmosphericBackscatterFilter()
        atmfilter.apply(als)

        if metadata is not None:
            logger.info("Adding metadata")
            als.metadata.set_attributes(metadata["global_attrs"])
            als.metadata.set_variable_attributes(metadata["variable_attrs"])

        # Validate segment
        # -> Do not try to grid a segment that has no valid elevations
        if not als.has_valid_data:
            logger.error("... Invalid data in %s:%g-%g -> skipping segment" %
                         (als_filename, start_sec, stop_sec))
            continue

        if flightdata is not None:
            als.set_flightdata(flightdata)

        # Grid the data and create a netCDF
        # NOTE: This can be run in parallel for different segments, therefore option to use
        #       multiprocessing
        export_dir = source_dir
        gridding_workflow(als, dem_cfg, export_dir)
예제 #40
0
파일: film.py 프로젝트: suyu0925/spiders
    def parse(self, response):
        logger.info(f'parse film page {self.page}')
        if False:
            filename = f'film_{self.actor[0]}_{self.page}.html'
            with open(filename, 'wb') as f:
                f.write(response.body)
            logger.info(f'Saved file {filename}')

        hrefs = response.selector.xpath('//div[@id="videos"]//div[@class="grid-item column"]/a/@href').getall()
        titles = response.selector.xpath('//div[@id="videos"]//div[@class="grid-item column"]/a/@title').getall()
        covers = response.selector.xpath(
            '//div[@id="videos"]//div[@class="grid-item column"]/a/div/img/@data-src').getall()
        uids = response.selector.xpath(
            '//div[@id="videos"]//div[@class="grid-item column"]/a/div[@class="uid"]/text()').getall()
        dates = response.selector.xpath(
            '//div[@id="videos"]//div[@class="grid-item column"]/a/div[@class="meta"]/text()').getall()
        dates = [x.strip() for x in dates]
        tags_selector = response.selector.xpath(
            '//div[@id="videos"]//div[@class="grid-item column"]/a/div[@class="tags has-addons"]')
        tags = [','.join(x.xpath('span/text()').getall()) for x in tags_selector]
        rows = [(self.actor[0], hrefs[i], titles[i], uids[i], covers[i], tags[i], dates[i]) for i in range(len(hrefs))]

        # check if reach the bound
        try:
            c = self.conn.cursor()
            c.execute("""CREATE TABLE IF NOT EXISTS films(
                name text,
                href text,
                title text,
                uid text,
                cover text,
                tag text,
                date text,
                footage text,
                maker text,
                deliver text,
                rate text,
                category text,
                actor text,
                PRIMARY KEY(uid)
            );""")
            sql = f"""SELECT * FROM films 
                WHERE name = '{self.actor[0]}' AND uid = '{uids[-1]}'
            ;"""
            c.execute(sql)
            film = c.fetchone()
            if not film is None:
                logger.info(f'film({self.actor[0]}, {uids[-1]}) has crawled, {film} done job')
                return
        except Exception as e:
            logger.error(f'crawl film raise {e}')
            return

        # save to sqlite3
        try:
            c = self.conn.cursor()
            c.executemany("""
                INSERT OR REPLACE INTO films(name, href, title, uid, cover, tag, date) 
                VALUES (?,?,?,?,?,?,?);
            """, rows)
            logger.info(f'insert {c.rowcount} rows')
            self.conn.commit()
        except Exception as e:
            logger.error(f'sql raise error {e}')
            self.conn.rollback()

        # check if there is next page
        has_next_page = response.selector.xpath('//nav[@class="pagination"]/a[@rel="next"]/@href').get()
        if has_next_page is None:
            return

        # crawl next page
        self.page = self.page + 1
        yield from self.crawl()
예제 #41
0
파일: Surveyor.py 프로젝트: adlpr/pylmldb
    def run_report(self, outf_name: str = "surveyor_out.csv") -> None:
        # check write permissions for output file before running the whole thing
        with open(outf_name, 'w', encoding='utf-8-sig') as outf:
            pass
        #
        with LMLDB() as db:
            primary_id_to_secondary_records = {}
            secondary_id_to_secondary_record = {}
            if self.use_crossreferencing and self.primary_record_type != db.AUT:
                logger.info("building crossreferences")
                if self.primary_record_type == db.HDG:
                    secondary_record_type, get_secondary_record_ids = db.BIB, db.get_bibs_for_hdg
                else:
                    secondary_record_type, get_secondary_record_ids = db.HDG, db.get_hdgs_for_bib
                logger.info(
                    "mapping secondary record ids to secondary records")
                for ctrlno, record in db.get_records(secondary_record_type):
                    secondary_id_to_secondary_record[str(ctrlno)] = record
                logger.info(
                    "mapping primary record ids to lists of secondary records")
                for ctrlno, record in db.get_records(self.primary_record_type):
                    primary_id = str(ctrlno)
                    primary_id_to_secondary_records[primary_id] = [
                        secondary_id_to_secondary_record.get(secondary_id)
                        for secondary_id in (
                            get_secondary_record_ids(primary_id) or ())
                    ]
                del secondary_id_to_secondary_record
                if self.use_items:
                    # load item vw table
                    # @@@@@@@@@@@@@@@@@@@@@@@@@
                    logger.info("pull item record info")
                    with open("surveyordata/ITEM_VW.csv",
                              encoding='windows-1251') as inf:
                        reader = csv.reader(inf, dialect='excel')
                        header = list(next(reader))
                        item_vw = [dict(zip(header, line)) for line in reader]

            # pull records and filter
            logger.info("pull primary records and filter")
            filtered_record_sets = []
            for ctrlno, primary_record in db.get_records(
                    self.primary_record_type):
                primary_id = str(ctrlno)
                secondary_records = primary_id_to_secondary_records.get(
                    primary_id, [])
                tertiary_records = []
                if all(
                        f(primary_id, primary_record, secondary_records,
                          tertiary_records) for f in self.filters):
                    filtered_record_sets.append(
                        (primary_id, primary_record, secondary_records,
                         tertiary_records))

        # build columns
        # header
        results = [tuple(self.columns.keys())]
        for record_set in filtered_record_sets:
            results.append(
                [col_func(*record_set) for col_func in self.columns.values()])

        # output
        logger.info("outputting")
        with open(outf_name, 'w', encoding='utf-8-sig') as outf:
            writer = csv.writer(outf, dialect=csv.excel, quoting=csv.QUOTE_ALL)
            for result in results:
                writer.writerow(result)
            dta = {
                # TODO Make this dynamic and read the data from the response one I figure out how to decode the stuff. .decode() does not work
                "RelayState": "https://mycourses.rit.edu/d2l/shibbolethSSO/login.d2l",
                "SAMLResponse": re.search('(<input type="hidden" name="SAMLResponse" value=").*("/>)', rs.text).group(
                    0).replace('<input type="hidden" name="SAMLResponse" value="', '').replace('"/>', '')
            }
        # Soooo it seems that shibboleth might not return the right code on password error
        except Exception:
            progress.clear()
            logger.warning("Shibboleth rejected your username and/or password.")
            exit()

        rq = session.post(D2L_BASEURL + "/Shibboleth.sso/SAML2/POST", data=dta, allow_redirects=True)
        session.get(D2L_BASEURL + "/d2l/lp/auth/login/ProcessLoginActions.d2l")

    logger.info("Successfully logged into MyCourses")

    with halo.Halo(text="Discovering Courses", spinner="dots") as progress:

        # We need to get the XSRF.Token to move forward
        bph = session.get("{}/d2l/le/manageCourses/search/6605".format(D2L_BASEURL))
        if bph.status_code != 200:
            logger.error("Course Query failed. Invalid response code! Expected 200, got {}", bph.status_code)
            exit(1)

        token = get_xfrs_token(bph.text)
        now = datetime.datetime.now()
        query_data = {
            "gridPartialInfo$_type": "D2L.LP.Web.UI.Desktop.Controls.GridPartialArgs",
            "gridPartialInfo$SortingInfo$SortField": "OrgUnitName",
            "gridPartialInfo$SortingInfo$SortDirection": "0",
예제 #43
0
파일: app.py 프로젝트: labroid/PhotoManager
 def update_md5s(self):
     self.status("Updating MD5s...")
     for photo in Queue.objects(md5sum=None):
         photo.modify(md5sum=file_md5sum(photo.src_path))
     logger.info("MD5 Done")