def main(): try: retry_count = 0 image_count = 0 while retry_count < 4 and image_count < 87: r = requests.get(url, headers=headers).json()['value'] image_count = len(r) logging.info('%s items found.' % image_count) if image_count < 87: logging.warning('Image count < 87') retry_count += 1 sleep(5) except JSONDecodeError as e: logging.exception("JSONDecodeError") else: path_split = r[0]['ImageLink'].split('?')[0].split('/') request_datetime = ('%s%s' % (path_split[3], path_split[4])).replace( '-', '') dest_dir = os.path.join(images_dir, request_datetime) if os.path.isdir(dest_dir): logging.info('Deleting %s' % dest_dir) shutil.rmtree(dest_dir) os.makedirs(dest_dir) meta_df = [] logging.info('Downloading images to %s' % dest_dir) for item in r: dest_path = os.path.join( dest_dir, item['ImageLink'].split('?')[0].split('/')[-1]) download_image(item['ImageLink'], dest_path) path_split = item['ImageLink'].split('?')[0].split('/') meta_data = [ item['CameraID'], item['Latitude'], item['Longitude'], path_split[3], path_split[-1].split('_')[1], path_split[-1].split('.')[0], Image.open(dest_path).size ] meta_df.append(meta_data) metadata_dir = os.path.join(data_dir, 'traffic-images-metadata') if not os.path.isdir(metadata_dir): logging.info('Creating %s' % metadata_dir) os.makedirs(metadata_dir) metadata_path = os.path.join(metadata_dir, '%s_images.csv' % request_datetime) pd.DataFrame(meta_df, columns=[ 'CameraID', 'Latitude', 'Longitude', 'Date', 'Time', 'Filename', 'Dimensions' ]).to_csv(metadata_path, index=False, header=False) logging.info('Saved metadata to %s' % metadata_path)
def run(): conn = open_connection() div_ids = dt.insert_divisions_by_tags(dt.ENGLAND, conn=conn) data_tags = dt.get_tags_for_ids(div_ids, conn=conn) for year_tag in years_tags: logging.info(f"Reading football data for year tag {year_tag}") for division_id, division_tag in zip(div_ids, data_tags): csv_url = f"{url}{year_tag}/{division_tag}.csv" try: df = read_csv(csv_url) except UnicodeDecodeError: logging.warning(f"Failed to read csv for year tag {year_tag}") page = requests.get(csv_url) csv_file = page.text.encode().decode("utf-8") with open("tmp.csv", "w") as f: f.write(csv_file) df = read_csv("tmp.csv") df = df.dropna(thresh=10) start_date = df["Date"].replace(["NaN", 'NaT'], np.nan).dropna().min() end_date = df["Date"].replace(["NaN", 'NaT'], np.nan).dropna().max() st_id = st.insert(division_id, year_tag, start_date, end_date, conn=conn) match.insert_matches(df, st_id, conn=conn) conn.commit() close_connection(conn)
def createIndex(esServer="localhost:9200", indexName="test", indexType="doctype", mapping=None, setting=None, clear=False): conn = ES(esServer) if clear: deleteIndex(esServer=esServer, indexName=indexName) logging.info('Create index %s ...', indexName) try: if setting: conn.indices.create_index(indexName, setting) else: conn.indices.create_index(indexName, None) except exceptions.IndexAlreadyExistsException: logging.warning('Index is already created: %s ...', indexName) if mapping is not None: logging.info('Put mapping...') conn.indices.put_mapping(indexType, mapping, [indexName]) return True
def has_goalcom_url(fifa_id, conn): pit_data = pit.get_by_fifa_id(fifa_id, conn=conn) if pit_data.shape[0] == 0: logging.warning(f"No player for id {fifa_id}") return False else: if pit_data["goalcom_name"].item(): return True return False
def get_feature_vector_for_match(match): match_df = mt.get_match(match["id"], conn=conn) feature_vector = {} if match_df.shape[0]: match_data = match_df.iloc[0, :].to_dict() feature_vector = { **match_data, **data_provider.create_match_feature_vector(match_data, conn=conn) } else: logging.warning(f"No match data for match id {match['id']}") return feature_vector
def is_node_a_state(node: dict) -> bool: """ detects if node is a state (using @configuration key) :param node: dict with node :return: true if state otherwise false """ try: if node['y:GenericNode']['@configuration'] == "com.yworks.entityRelationship.big_entity": return True except KeyError: logging.warning("%s node is incorrect" % node['id']) return False return False
def attach_ids_to_players(players): valid_players = [] invalid_players = [] for player in players: ret = pit.get_id_by_goalcom_url(player["url_id"], conn=conn) if ret: player["fifa_id"] = ret[0] valid_players.append(player) else: logging.warning( f"No player for goalcom player url {player['url_id']}") invalid_players.append(player) return valid_players, invalid_players
def main(conn): fifa_ids = pd.read_csv("fifa_ids.csv")["fifa_id"].values logging.info(f"Inserting {len(fifa_ids)} players") for fifa_id in fifa_ids: player_obj = {} player_df = get_latest_by_fifa_id(fifa_id, conn=conn) if player_df.shape[0] == 0: logging.warning(f"No player found for fifa id {fifa_id}") player_obj["fifa_name"] = player_df["name"].item() player_obj["fifa_id"] = fifa_id insert(conn, **player_obj) conn.commit()
def is_node_a_choice(node: dict) -> bool: """ detects if node is a choice (using @configuration key) :param node: dict with node :return: true if state otherwise false """ try: if node['y:GenericNode']['@configuration'] == "com.yworks.bpmn.Gateway.withShadow": return True except KeyError: logging.warning("%s node is incorrect" % node['id']) return False return False
def check_if_running(dir_name): to_process = False currently_detecting = None if os.path.isfile(log_path): try: with open(log_path, 'r') as f: currently_detecting = json.load(f) except json.decoder.JSONDecodeError: logging.warning('Error loading currently detecting json') pass else: logging.info('Loaded current runs') if currently_detecting is not None: if dir_name in currently_detecting: start = datetime.strptime(currently_detecting[dir_name], '%Y-%m-%d %H:%M:%S %z') if (datetime.now(tz=timezone('Singapore')) - start).total_seconds() / 60 > 60: logging.info( 'Last run for %s started over 60 minutes ago, starting parallel run' % dir_name) to_process = True currently_detecting[dir_name] = datetime.now( tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z') else: logging.info( 'Last run for %s within 60 mins still running, skipping for now' % dir_name) else: logging.info('No current runs for %s detected' % dir_name) to_process = True currently_detecting[dir_name] = datetime.now( tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z') else: logging.info('No current run logs found, generating from scratch') to_process = True currently_detecting = { dir_name: datetime.now( tz=timezone('Singapore')).strftime('%Y-%m-%d %H:%M:%S %z') } if to_process: with open(log_path, 'w') as f: json.dump(currently_detecting, f) return to_process
def get_state_actions(data: dict) -> str: """ get label with actions from node data :param data: node with data :return: str with actions """ try: data = data['y:GenericNode'] except KeyError: logging.warning("Cannot retrieve state actions %s" % data['id']) data = flatten([data], 'y:NodeLabel') for label in data: if "#text" in label.keys() and '@configuration' in label.keys(): if label['@configuration'] == 'com.yworks.entityRelationship.label.attributes': return label['#text'] return ""
def get_state_label(data: dict) -> str: """ gets state label from node data :param data: dict with data :return: string with label """ node_id = data['id'] try: data = data['y:GenericNode']['y:NodeLabel'] except KeyError: logging.warning("Cannot retrieve state name %s" % data['id']) if not isinstance(data, list): data = [data] for label in data: if "#text" in label.keys() and '@configuration' in label.keys(): if label['@configuration'] == 'com.yworks.entityRelationship.label.name': return label['#text'] logging.warning("Cannot retrieve state name %s" % node_id) return ""
def get_group_label(data: dict) -> str: """ gets group node label from node data :param data: dict with data :return: string with label """ node_id = data['id'] try: data = data['y:ProxyAutoBoundsNode']['y:Realizers']['y:GroupNode'] except KeyError: logging.warning("Cannot retrieve group name %s" % data['id']) return "" data = flatten([data], 'y:NodeLabel') for label in data: if "#text" in label.keys() and '@modelName' in label.keys(): if label['@modelName'] == 'internal': return label['#text'] logging.warning("Cannot retrieve group name %s" % node_id) return ""
def get_group_actions(data: dict) -> str: """ get label with actions from group node data :param data: node with data :return: str with actions """ try: data = data['y:ProxyAutoBoundsNode']['y:Realizers']['y:GroupNode'] except KeyError: logging.warning("Cannot retrieve group actions %s" % data['id']) return "" data = flatten([data], 'y:NodeLabel') for label in data: if "#text" in label.keys() and '@modelName' in label.keys(): if label['@modelName'] == 'custom': return label['#text'] if label['@alignment'] == 'left': return label['#text'] return ""
def clear_working_files(dir_name): images_path = os.path.join(images_dir, dir_name) shutil.rmtree(images_path) logging.info('Deleted %s' % images_path) metadata_path = os.path.join(metadata_dir, '%s_images.csv' % dir_name) os.remove(metadata_path) logging.info('Deleted %s' % metadata_path) try: with open(log_path, 'r') as f: currently_detecting = json.load(f) except json.decoder.JSONDecodeError: logging.warning('Error loading currently detecting json') pass else: if dir_name in currently_detecting: currently_detecting.pop(dir_name) with open(log_path, 'w') as f: json.dump(currently_detecting, f)
def get_team_features_for_matches(match_id, date, **kwargs): lineup = lt.get_by_match_id(match_id, **kwargs) if not lineup.shape[0]: return {}, {} home_players = lineup.loc[:, lineup.columns.str.startswith('hp')].values[0] home_subst = lineup.loc[:, lineup.columns.str.startswith('hs')].values[0] home_fifa_ids = [player for player in np.concatenate((home_players, home_subst)) if player] home_features = calculate_player_features_for_team(home_fifa_ids, date, **kwargs) if home_features["gk_handling"] < 60: logging.warning(f"No goalkeeper in home team's lineup!! Match id: {match_id}") home_features = {f'home_{k}': v for k, v in home_features.items()} away_players = lineup.loc[:, lineup.columns.str.startswith('ap')].values[0] away_subst = lineup.loc[:, lineup.columns.str.startswith('as')].values[0] away_fifa_ids = [player for player in np.concatenate((away_players, away_subst)) if player] away_features = calculate_player_features_for_team(away_fifa_ids, date, **kwargs) if away_features["gk_handling"] < 60: logging.warning(f"No goalkeeper in away team's lineup!! Match id: {match_id}") away_features = {f'away_{k}': v for k, v in away_features.items()} return home_features, away_features
def map_lineup_with_player_data(lineup): home_team = lineup["home_team"] away_team = lineup["away_team"] date = lineup["date"] home_team_players, home_invalid_players = attach_ids_to_players( lineup["home_team_players"] + lineup["home_team_substitutes"]) for home_invalid_player in home_invalid_players: add_invalid_players_to_missing(home_invalid_player, date, home_team) away_team_players, away_invalid_players = attach_ids_to_players( lineup["away_team_players"] + lineup["away_team_substitutes"]) for away_invalid_player in away_invalid_players: add_invalid_players_to_missing(away_invalid_player, date, away_team) match_id_tuple = mt.get_id_for_game(home_team, away_team, date, conn=conn) if not match_id_tuple: logging.warning( f'No match for {lineup["home_team"]} vs. {lineup["away_team"]} {date}' ) missing_match_urls.append( lineup.get("match_link", None) + lineup.get("match_id", None)) else: store_lineups(match_id_tuple[0], home_team_players, away_team_players)
def replace_image(job_id, file_name, html_string, bucket_name, bucket_folder='content/'): # parse html and put it in a variable images = set(re.findall("src='([^']+)'", html_string)) logging.info("[IMG] Start analyzing html for job %s in file %s", job_id, file_name) # run loop for all images in the html # Upload images in our bucket and replace image src for image in images: image_src = image.strip() # if image was not uploaded to hackapad s3 ignore if not image_src.startswith( 'https://hackpad-attachments.s3.amazonaws.com/'): continue logging.info("[IMG] Processing image %s" % image_src) #get image mime_type mime_type_info = mimetypes.guess_type(image_src) mime_type = mime_type_info[0] if mime_type_info[0] else 'image/jpeg' # construct expire and cache_control headers days = 100 cache_control = 'max-age= %d' % (60 * 60 * 24 * days) expires = datetime.utcnow() + timedelta(days=days) expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") try: logging.info("[IMG] First try for image %s", image_src) # get image name image_url_parts = image_src.split('/') image_name = image_url_parts # read image url image_src_parsed = urllib.parse.urlparse(image_src) image_name_encoded = urllib.parse.quote(image_src_parsed.path) file = io.BytesIO( urllib.request.urlopen( urllib.parse.urljoin(image_src, image_name_encoded)).read()) img = Image.open(file, mode='r') except urllib.error.HTTPError as error: logging.warning( "[IMG] First try block resulted in urllib.error.HTTPError: %s" % error) try: logging.info("[IMG] retry for image %s", image_src) file = io.BytesIO(urllib.request.urlopen(image_src).read()) img = Image.open(file, mode='r') except urllib.error.HTTPError as error: logging.error("[IMG] %s", error.read()) continue except UnicodeEncodeError: logging.error("[IMG] UnicodeEncodeError for image %s", image_src) continue # get the image extension image_parts = image_src_parsed.path.split('.') image_extension = 'JPEG' if image_parts[-1].upper( ) == 'JPG' else image_parts[-1] # hack for weird image URLs if len(image_extension) > 4: image_extension = 'png' # stream file in binary mode imgByteArr = io.BytesIO() img.save(imgByteArr, format=image_extension.upper()) imgByteArr = imgByteArr.getvalue() # upload image to our bucket # First check if it already exists exists = False try: s3.Object(bucket_name, bucket_folder + image_name[-1]).load() except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": exists = False else: exists = True if exists: logging.info("[IMG] Skipping upload: %s already exists" % image_src) else: logging.info("[IMG] Uploading %s" % image_src) s3.Bucket(bucket_name).put_object(Key=bucket_folder + image_name[-1], Body=imgByteArr, ACL='public-read', ContentType=mime_type, CacheControl=cache_control, Expires=expires) logging.info("[IMG] Replace %s with %s" % (image_src, 'https://s3-eu-west-1.amazonaws.com/' + bucket_name + '/' + bucket_folder + image_name[-1])) # replace the src of the image with the new uploaded location html_string = html_string.replace( image_src, 'https://s3-eu-west-1.amazonaws.com/' + bucket_name + '/' + bucket_folder + image_name[-1]) logging.info("[IMG] Replaced with %s", image_src) logging.info("[IMG] Finished analyzing html for job %s in file %s", job_id, file_name) return html_string
def create_actions(raw_triggers: str, source: str, player_signal: [str]) -> [Trigger]: """ parses raw label text with events and their actions to get a list of Triggers ("exit" and "entry" events ignored) we use regexp to split raw data string regexp is some non-space symbols, then some space symbols, than "/" symbol Example: >>>create_actions("entry/ BUTTON2_PRESSED/ flash(get_color(rgb_table)); play_sound(get_random_sound(BLASTER)); BUTTON2_PRESSED_FOR_THREE_SECOND/ play_sound(get_random_sound(FORCE); BOTH_BUTTONS_PRESSED/ change_color(get_color(rgb_table)); play_sound(get_sound(BOOT), 5);") [Trigger(name="BUTTON2_PRESSED", action="flash(get_color(rgb_table)); play_sound(get_random_sound(BLASTER));", source=5) Trigger(name="BUTTON2_PRESSED_FOR_THREE_SECOND"), action="play_sound(get_random_sound(FORCE);", source=5), Trigger(name="BOTH_BUTTONS_PRESSED"), action="change_color(get_color(rgb_table)); play_sound(get_sound(BOOT));", source=5)] :param raw_triggers: string with events and reactions :param source: id of source node :param player_signal - list of all sygnals :return: list of Triggers, list of sygnals """ trigger_regexp = r"\S+\s*/" + '\n' trigger_list = re.findall(trigger_regexp, raw_triggers) trigger_data = re.split(trigger_regexp, raw_triggers) triggers = dict(list(zip(trigger_list, trigger_data[1:]))) actions = [] for (trigger_id, (trigger, action)) in enumerate(triggers.items(), start=1): guard = "" trigger_name = trigger[:-2].strip() if '[' in trigger_name: guard_regexp = r"\[.*\]" res = re.search(guard_regexp, trigger_name) guard = res.group(0)[1:-1] trigger_name = re.split(guard_regexp, trigger_name)[0].strip() if guard != 'else': logging.warning("Internal trigger %s[%s] can't contain guard" % (trigger_name, guard)) if trigger_name not in player_signal and trigger_name and trigger_name != "entry" and trigger_name != 'exit': player_signal.append(trigger_name) actions.append( Trigger(name=trigger_name, action=action.strip(), source=source, type="internal", guard=guard, target="", id=trigger_id, x=0, y=internal_trigger_height * trigger_id, dx=len(trigger_name) + internal_trigger_delta, dy=0, points=[], action_x=0, action_y=5 * trigger_id - 2, action_width=len(trigger_name) + action_delta)) return actions, player_signal
# https://github.com/jeremyephron/simple-gmail/issues/6 compute = googleapiclient.discovery.build('compute', 'v1', credentials=credentials, cache_discovery=False) with open(os.path.join(proj_dir, 'config/instances.json'), 'r') as f: config = json.load(f) for instance in config: project = config[instance]['project'] zone = config[instance]['zone'] status = get_instance(compute, project, zone, instance)['status'] if status != 'RUNNING': logging.warning('%s / %s / %s STATUS: %s' % (project, instance, zone, status)) else: logging.debug('%s / %s / %s STATUS: %s' % (project, instance, zone, status)) # https://cloud.google.com/compute/docs/instances/instance-life-cycle # https://cloud.google.com/compute/docs/instances/preemptible if status == 'TERMINATED': logging.warning('Restarting instance') response = start_instance(compute, project, zone, instance) logging.warning(response) if status == 'RUNNING' and terminate: logging.warning('Terminating instance') response = stop_instance(compute, project, zone, instance) logging.warning(response)
def update_states_with_edges(states: [State], flat_edges: [dict], start_state: State, player_signal: [str], min_x: int, min_y: int): """ function parses events on edges and adds them as external triggers to corresponding state (excluding start_edge) and recognizes and adds special labels to a choice edgea :param states: list of states :param flat_edges: list with edges :param start_state - id for start state for exclude start edge :param player_signal - list of already created signals :return: """ for edge in flat_edges: old_source = edge['source'] if old_source != start_state: old_target = edge['target'] source_state = get_state_by_id(states, old_source, "old") target_state = get_state_by_id(states, old_target, "old") if is_edge_correct( edge, "y:GenericEdge" ) and "#text" in edge['y:GenericEdge']['y:EdgeLabel'].keys(): action = edge['y:GenericEdge']['y:EdgeLabel']["#text"].split( '/') trigger_name = action[0].strip() guard = "" if '[' in trigger_name and ']' in trigger_name: guard_regexp = r"\[.*\]" res = re.search(guard_regexp, trigger_name) guard = res.group(0)[1:-1] trigger_name = re.split(guard_regexp, trigger_name)[0].strip() if guard == 'else': logging.warning( "External trigger %s[%s] can't contain 'else'" % (trigger_name, guard)) trigger_action = action[1].strip() if len(action) > 1 else "" else: trigger_name = "" trigger_action = "" x, y, dx, dy, points = get_edge_coordinates(edge) new_points = [] for point in points: new_points.append(((point[0] - min_x) // divider, (point[1] - min_y) // divider)) action_x, action_y, action_width = get_edge_label_coordinates(edge) trig_type = "external" if source_state.type == "choice": trig_type = "choice_result" if target_state.type == "choice": trig_type = "choice_start" trigger = Trigger(name=trigger_name, type=trig_type, guard=guard, source=old_source, target=old_target, action=trigger_action, id=0, x=(x) // divider, y=(y) // divider, dx=dx // divider, dy=dy // divider, points=new_points, action_x=action_x // divider, action_y=action_y // divider, action_width=action_width // divider + 2) source_state.trigs.append(trigger) if trigger_name and trigger_name not in player_signal: player_signal.append(trigger_name) update_state_ids(states) return player_signal
def warning(session, e): logging.warning("Database connection has problem. Retrying...: %s" % e) session.rollback()