def request(self, keyword_url): if not self.proxies: logging.error('No proxy is available') return None referer = self.referer proxy = random.choice(self.proxies) user_agent = random.choice(self.user_agents) proxies = {self.protocol: '{}://{}'.format(self.protocol, proxy)} headers = {'User-Agent': user_agent, 'referer': referer} keyword = keyword_url[0] url = keyword_url[1] timeout = 10 try: req = requests.get(url, proxies=proxies, headers=headers, timeout=timeout) except Exception, e: # print e # remove useless proxy try: self.proxies.remove(proxy) logging.INFO('Remove userless proxy {}'.format(proxy)) except: pass return self.request(keyword_url)
def run(): try: content = load_content() data = parse_content(content) publish(data) except Exception as e: logging.error(str(e))
def write_to_mongo(df, database, collection, incremental_run): """ This method writes the data into MongoDB. If incremental value is 1, then the data is appended to the database. Else, overwritten. Parameters: ----------- df (Dataframe): The dataframe to be written to MongoDB. database (string): The database in which we are going to write the data. collection (string): The collection in which we are going to write the data. incremental_run (int): Determines if data is overwrritten or appended to the collection. """ try: logging.info('Write to MongoDB in progress') write_mode = "overwrite" if incremental_run: write_mode = "append" df.write.format("mongo").mode(write_mode).option( "database", database).option("collection", collection).save() logging.info('Write to MongoDB completed successfully') except Exception as e: logging.error('Error in write_to_mongo() function: {0}'.format(e)) raise e
def get_states_from_graphml(filename: str): """ creates state list :return: """ try: data = xmltodict.parse(open(filename).read()) except FileNotFoundError: logging.error('File %s does not exist' % filename) return list(), 0, 0 # get nodes from file flat_nodes = gr.get_flat_nodes(data) state_nodes = [node for node in flat_nodes if gr.is_node_a_state(node) or gr.is_node_a_choice(node) or gr.is_node_a_group(node)] state_nodes.sort(key=lambda st: len(st['id'])) gr.update_qroup_nodes(state_nodes) state_nodes.sort(key=gr.coord_sort) coords = gr.get_minmax_coord(state_nodes) # get min and max coord and height and widt of scheme # create states from nodes and add internal triggers to list of signals and all functions to function list qm_states, player_signal = qm.create_states_from_nodes(state_nodes, coords, [], []) # get edges for external triggers flat_edges = gr.get_flat_edges(data) try: start, start_node, start_action = gr.get_start_node_data(flat_nodes, flat_edges) except ValueError: logging.error('UML-diagram %s.graphml does not have start node' % filename) return list(), 0, 0 # add external trigger and update list of signals with them _ = qm.update_states_with_edges(qm_states, flat_edges, start, player_signal, coords[0], coords[1]) return qm_states, coords[0], coords[1]
def httpResponse(url, method='POST', data=None, headers=None): response = None if headers == None: headers = { "Origin": "https://www.baidu.com", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) " +\ "AppleWebKit/537.36 (KHTML, like Gecko) " +\ "Chrome/34.0.1847.116 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded", "Referer": "https://www.baidu.com/login?forward=http://localhost", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4", "Cookie": "sessionid=ubwzabfvvyy0ft4y4nk5qlduv7nswrim", } try: request = urllib2.Request(url, data, headers=headers, origin_req_host=None, unverifiable=False) #request = urllib2.Request(url, data, origin_req_host=None, unverifiable=False) request.get_method = lambda: method try: response = urllib2.urlopen(request) #python <= 2.6 except AttributeError, e: logging.debug(u'URLopen AttributeError: {msg}'.format(msg=e)) response = urllib2.urlopen( request, context=ssl._create_unverified_context()) #python 2.7 except urllib2.HTTPError, e: logging.error(u'HTTP 服务器({url})无法完成请求.错误码:{code}'.format(url=url, code=e.code), exc_info=True)
def read_from_cassandra(incremental_run, keyspace, table): """ This method reads the data from cassandra based on the incremental_run value.If incremental value is 1, then the data read is for the running week and fetches the current day data from it. Else, fetches all the data from cassandra. Parameters: ----------- incremental_run (int): Determines how data is to be read. keyspace (string): Cassandra keyspace from which data is to be read. table (string): Cassandra table inside the keyspace from which data is to be read. Returns -------- df (Dataframe): The dataframe obtained after reading from Cassandra """ try: logging.info('Read from_cassandra in progress') column_names = ["event_time", "user_id"] if incremental_run: today = date.today() next_day = today + timedelta(days=1) today_starting_timestamp = datetime(today.year, today.month, \ today.day) next_day_starting_timestamp = datetime(next_day.year, next_day.month,\ next_day.day) #Set condition to fetch the current day data by pushing down the #predicate to reduce the number of entries retrived from the database. incremental_condition = \ (F.col("year") == year) & (F.col("week") == week_num) & \ (F.col("event_time") >= today_starting_timestamp) & \ (F.col("event_time") < next_day_starting_timestamp) df=spark.read.format("org.apache.spark.sql.cassandra")\ .option("spark.cassandra.connection.port", "9042")\ .option("keyspace", keyspace)\ .option("table", table)\ .load()\ .select(column_names)\ .where(incremental_condition) else: df=spark.read.format("org.apache.spark.sql.cassandra")\ .option("spark.cassandra.connection.port", "9042")\ .option("keyspace", keyspace)\ .option("table", table)\ .load()\ .select(column_names) logging.info('Dataframe loaded successfully') return df except Exception as e: logging.error('Error in read_from_cassandra() function: {0}'.format(e)) raise e
def log_error_and_upload_manifests_to_s3(error, elasticsearch_docs): logging.error("Exception caught while sending manifests to elasticsearch") logging.exception(error) logging.info("Uploading manifests to s3 fallback bucket") s3_client.put_object(Bucket=MANIFEST_FALLBACK_BUCKET, Key=os.path.join(f"s3-batch/manifests/{datetime.utcnow().strftime('%Y-%m-%d')}.json.gz"), Body=gzip.compress(json.dumps(elasticsearch_docs).encode("utf-8")), ACL="private")
def main(filenames: Union[List[str], str]): qm_model, qm_package = cr.prepare_qm() player_signal = list() event_fields = dict() ctor_fields = dict() ctor_code = "" cppcode = "" hcode = "" if not isinstance(filenames, list): filenames = [filenames] modelnames: List[str] = list() for filename in filenames: try: data = xmltodict.parse(open(filename).read()) modelname = os.path.basename(filename) modelname = modelname.split('.')[0] modelname = modelname[0].lower() + modelname[1:] modelnames.append(modelname) except FileNotFoundError: logging.error('File %s does not exist' % filename) continue # get nodes from file flat_nodes = gr.get_flat_nodes(data) state_nodes = [node for node in flat_nodes if gr.is_node_a_state(node) or gr.is_node_a_choice(node) or gr.is_node_a_group(node)] state_nodes.sort(key=lambda st: len(st['id'])) gr.update_qroup_nodes(state_nodes) state_nodes.sort(key=gr.coord_sort) coords = gr.get_minmax_coord(state_nodes) # get min and max coord and height and widt of scheme # create states from nodes and add internal triggers to list of signals and all functions to function list functions: List[str] = list() qm_states, player_signal = qm.create_states_from_nodes(state_nodes, coords, player_signal, functions) # get edges for external triggers flat_edges = gr.get_flat_edges(data) try: start, start_node, start_action = gr.get_start_node_data(flat_nodes, flat_edges) except ValueError: logging.error('UML-diagram %s.graphml does not have start node' % filename) continue # add external trigger and update list of signals with them player_signal = qm.update_states_with_edges(qm_states, flat_edges, start, player_signal, coords[0], coords[1]) # get notes notes = [node for node in flat_nodes if gr.is_node_a_note(node)] # create qm data event_fields, hcode, cppcode, ctor_code, ctor_fields = cr.create_qm(qm_package, modelname, start_node, start_action, notes, qm_states, coords) # create file with final code try: cr.finish_qm(qm_model, qm_package, os.path.splitext(filenames[0])[0], modelnames, player_signal, event_fields, hcode, cppcode, ctor_code, ctor_fields) except PermissionError: logging.fatal("File already exists and is locked") service_files.create_files(os.path.dirname(filenames[0]), player_signal, modelname, functions)
def force_bulk(self, bulk=False): if bulk or len(self.bulker) >= self.batchSize == 0: success, errors = helpers.bulk( self.esConn, self.bulker.pop_all(), chunk_size=self.batchSize) # @UnusedVariable if errors: logging.error("Force bulk: there are some errors %s", errors) return False return True
def read_from_cassandra(incremental_run, keyspace, table): """ This method reads the data from cassandra based on the incremental_run value. If incremental value is 1, then the data read is for the running week and fetches the current day data from it. Else, fetches all the data from cassandra. Parameters: ----------- incremental_run (int): Determines how data is to be read. keyspace (string): Cassandra keyspace from which data is to be read. table (string): Cassandra table inside the keyspace from which data is to be read. Returns -------- df (Dataframe): The dataframe obtained after reading from Cassandra """ try: logging.info('Read from_cassandra in progress') column_names = ["event_time", "user_id"] if incremental_run: #Get current week number today_date = datetime.date.today() year, week_num, day_of_week = today_date.isocalendar() #Set condition to fetch the current week data by pushing down the predicate to reduce the number of entries #retrived from the database. incremental_condition = (F.col("year") == year) & (F.col("week") == week_num) #Get the current week data from cassandra df=spark.read.format("org.apache.spark.sql.cassandra")\ .option("keyspace", keyspace)\ .option("table", table)\ .load()\ .select(column_names)\ .where(incremental_condition) #Filter and fetch the current day's data df = df.filter(day(df.event_time) == today_date.day) else: #Reds the entire data from the cassandra df=spark.read.format("org.apache.spark.sql.cassandra")\ .option("spark.cassandra.connection.port", "9042").option("keyspace", keyspace)\ .option("table", table)\ .load()\ .select(column_names) logging.info('Dataframe loaded successfully') return df except Exception as e: logging.error('Error in read_from_cassandra() function: {0}'.format(e)) raise e
def upsert_batch(self, indexName, indexType, docs, batchSize=1000, idField=None): actions = self._buildIndexActions(indexName, indexType, docs, idField) success, errors = helpers.bulk(self.esConn, actions, chunk_size=batchSize) # @UnusedVariable if errors: logging.error("Upsert batch: there are some errors %s", errors)
def get_opened_files(log_errors=True): for pid in psutil.pids(): try: yield (file[0] for file in psutil.Process(pid).open_files()) except psutil.AccessDenied as e: if log_errors: logging.error("Access denied while getting process opened files") logging.exception(e) except psutil.NoSuchProcess: logging.debug("Process no longer exists") except Exception as e: if log_errors: logging.exception(e)
def get(self, path: str) -> str: """ Downloads a file and return its contents as a string. Objects that are larger than available memory cannot be loaded via `get`. """ parsed_path = parse_path(path) try: data = self.client.get_object(Bucket=parsed_path["bucket"], Key=parsed_path["key"])["Body"] return data.read().decode("unicode-escape") except ClientError as err: self.error = err if err.response["Error"]["Code"] == "NoSuchKey": log.error("S3 file %s does not exist", path)
def process_request(command): pidlist = [] for proc in process_iter(): if re.match(command, proc.name()): pidlist.append(proc.pid) for pid in pidlist: process = Process(pid) try: ios = process.io_counters() for iotype in ios._fields: IO_PROCESS.labels(io_type=iotype, pid=pid, cmd=process.name()).set(getattr(ios, iotype)) except AccessDenied: logging.error("unable to access to PID %s stats" % pid) return IO_PROCESS
def multiHttpResponse(url, method='POST', data=None, count=5, sleep=5): ''' HTTP GET/POST 数据,失败重试(默认重试5次,每次延时5秒) ''' num = 0 while count: num += 1 logging.info(u'{method}第{num}次数据.'.format(method=method, num=num)) code, result = httpResponse(url, method, data) if code == 200 or result != 'error': return code, result else: time.sleep(sleep) count -= 1 logging.error(u'尝试{num}次,仍然失败退出.'.format(num=num)) return False, 'error'
def main(filenames): #filenames = ['ka-tet', 'prioritizer2', 'location', 'emotion', 'dogan_ligt', 'reason_handler', 'dogan1', # 'lightsaber'] #filenames = ["ka_tet_counter", "ka_tet", "character"] qm_model, qm_package = cr.prepare_qm() player_signal=[] for filename in filenames: try: data = xmltodict.parse(open(filename + '.graphml').read()) except FileNotFoundError: logging.error('File %s.graphml does not exist' % filename) continue #get nodes from file flat_nodes = gr.get_flat_nodes(data) state_nodes = [node for node in flat_nodes if gr.is_node_a_state(node) or gr.is_node_a_choice(node) or gr.is_node_a_group(node)] gr.update_qroup_nodes(state_nodes) state_nodes.sort(key=gr.coord_sort) coords = gr.get_minmax_coord(state_nodes) #get min and max coord and height and widt of scheme #create states from nodes and add internal triggers to list of signals qm_states, player_signal = qm.create_states_from_nodes(state_nodes, coords, player_signal) #get edges for external triggers flat_edges = gr.get_flat_edges(data) try: start, start_node, start_action = gr.get_start_node_data(flat_nodes, flat_edges) except ValueError: logging.error('UML-diagram %s.graphml does not have start node' % filename) continue #add external trigger and update list of signals with them player_signal = qm.update_states_with_edges(qm_states, flat_edges, start, player_signal, coords[0], coords[1]) #get notes notes = [node for node in flat_nodes if gr.is_node_a_note(node)] #create qm data event_fields = cr.create_qm(qm_package, filename, start_node, start_action, notes, qm_states, coords, player_signal) #create file with final code try: cr.finish_qm(qm_model, qm_package, filenames, player_signal, event_fields) except PermissionError: logging.fatal("File already exists and is locked")
def get_user_count_by_day(cached_df): """ This method finds out the daily user count using the platform. Parameters: ----------- df (Dataframe): The dataframe obtained after reading from Cassandra Returns -------- result (Dataframe): The dataframe with the daily user count. """ try: logging.info('Getting user count by hour in progress') cached_df.createOrReplaceTempView('cached_df') user_count_per_day_df = \ spark.sql(''' with grouped_user_by_day AS ( SELECT user_id, DATE(event_time) as date1 FROM df GROUP BY user_id,DATE(event_time) ) SELECT date1, COUNT(1) FROM grouped_user_by_day GROUP BY date1 ''' ) ## result = result.withColumn("day",result["day"].cast(StringType())) ## result = result.groupBy("year","month").agg( ## F.map_from_entries(\ ## F.collect_list(\ ## F.struct("day", "count"))).alias("user_count")) ## return result logging.info('Got User count by hour successfully') return user_count_per_day_df except Exception as e: logging.error( 'Error in get_user_count_by_day() function: {0}'.format(e)) raise e
def restore_from_dir(openshift_client, directory, resources): for resource_kind in os.listdir(directory): print(resources) if resource_kind in resources: resource_kind_dir = directory + '/' + resource_kind for single_resource in os.listdir(resource_kind_dir): full_path = '{0}/{1}'.format(resource_kind, single_resource) log.info('Restoring {}'.format(full_path)) try: with open(resource_kind_dir + '/' + single_resource, 'r') as f: resource_yaml = yaml.load(f) openshift_client.create_resource(resource_kind, resource_yaml, args.restore_project_name) except ApiException as err: log.error('Unable to restore {0}'.format(full_path)) log.debug(err)
def main(filenames: Union[List[str], str]): player_signal = list() if not isinstance(filenames, list): filenames = [filenames] modelnames: List[str] = list() for filename in filenames: try: data = xmltodict.parse(open(filename).read()) modelname = os.path.basename(filename) modelname = modelname.split('.')[0] modelname = modelname[0].lower() + modelname[1:] modelnames.append(modelname) except FileNotFoundError: logging.error('File %s does not exist' % filename) continue # get nodes from file flat_nodes = gr.get_flat_nodes(data) state_nodes = [node for node in flat_nodes if gr.is_node_a_state(node) or gr.is_node_a_choice(node) or gr.is_node_a_group(node)] state_nodes.sort(key=lambda st: len(st['id'])) gr.update_qroup_nodes(state_nodes) state_nodes.sort(key=gr.coord_sort) coords = gr.get_minmax_coord(state_nodes) # get min and max coord and height and widt of scheme # create states from nodes and add internal triggers to list of signals and all functions to function list functions: List[str] = list() qm_states, player_signal = qm.create_states_from_nodes(state_nodes, coords, player_signal, functions) # get edges for external triggers flat_edges = gr.get_flat_edges(data) try: start, start_node, start_action = gr.get_start_node_data(flat_nodes, flat_edges) except ValueError: logging.error('UML-diagram %s.graphml does not have start node' % filename) continue # add external trigger and update list of signals with them player_signal = qm.update_states_with_edges(qm_states, flat_edges, start, player_signal, coords[0], coords[1]) # get notes notes = [node for node in flat_nodes if gr.is_node_a_note(node)] # TODO(aeremin) Extract to separate file. CppFileWriter(modelname, start_node, start_action, qm_states, notes, player_signal).write_to_file(os.path.dirname(filename)) service_files.create_files(os.path.dirname(filenames[0]), player_signal, modelname, functions)
def get_user_count_by_hour(df): """ This method finds out the hourly user count using the platform. Parameters: ----------- df (Dataframe): The dataframe obtained after reading from Cassandra Returns -------- result (Dataframe): The dataframe with the daily user count. """ try: logging.info('Getting user count by hour in progress') df.createOrReplaceTempView('df') result = \ spark.sql(''' with cte1 AS ( SELECT DATE(event_time) as date1, HOUR(event_time) as hour, 1 as count FROM df GROUP BY user_id,DATE(event_time), HOUR(event_time) ) SELECT to_timestamp(CONCAT(cast(date1 as string),"/",cast(hour as string),":00:00"), "yyyy-MM-dd/HH:mm:ss") as date, YEAR(date1) as year, MONTH(date1) as month, DAY(date1) as day, hour, SUM(count) as count FROM cte1 GROUP BY date1,hour ''') ## result = result.withColumn("day",result["day"].cast(StringType())) ## result = result.groupBy("year","month").agg( ## F.map_from_entries(\ ## F.collect_list(\ ## F.struct("day", "count"))).alias("user_count")) ## return result logging.info('Got User count by hour successfully') return result except Exception as e: logging.error( 'Error in get_user_count_by_hour() function: {0}'.format(e)) raise e
def list(self, path: str) -> List[str]: """ Lists all objects at the given S3 path and returns them in a list. Each elemet of the returned list is a fully-qualified S3 path (i.e. it could be passed to other s3_manager functions). """ objects = [] parsed_path = parse_path(path) try: keys = self.client.list_objects_v2( Bucket=parsed_path["bucket"], Prefix=parsed_path["key"])["Contents"] objects.extend([ "s3://" + parsed_path["bucket"] + "/" + k["Key"] for k in keys ]) except ClientError as err: self.error = err if err.response["Error"]["Code"] == "NoSuchKey": log.error("S3 file %s does not exist", path) return objects
def get_weather(): url = "https://devapi.heweather.net/v7/weather/24h" key = global_config.get('config', 'key') location = global_config.get('config', 'location') # 失败重试5次 count = 0 while count < 5: try: count += 1 result = requests.get(url=url, params={ 'location': location, 'key': key }, timeout=(3, 1)) if result: return get_result(result) except Exception as e: logging.error(f'retry,{count}, {e}') time.sleep(1)
def main(filenames): for filename in filenames: try: data = xmltodict.parse(open(filename + '.graphml').read()) except FileNotFoundError: logging.error('File %s.graphml does not exist' % filename) continue flat_nodes = gr.get_flat_nodes(data) state_nodes = [ node for node in flat_nodes if gr.is_node_a_state(node) or gr.is_node_a_choice(node) or gr.is_node_a_group(node) ] gr.update_qroup_nodes(state_nodes) states = makexml.create_states_from_nodes(state_nodes) flat_edges = gr.get_flat_edges(data) try: start, start_action = gr.get_start_node_action( flat_nodes, flat_edges) except ValueError: logging.error('UML-diagram %s.graphml does not have start node' % filename) continue makexml.update_states_with_edges(states, flat_edges, start) makexml.createxml(filename, states, start_action)
def replace_image(job_id, file_name, html_string, bucket_name, bucket_folder='content/'): # parse html and put it in a variable images = set(re.findall("src='([^']+)'", html_string)) logging.info("[IMG] Start analyzing html for job %s in file %s", job_id, file_name) # run loop for all images in the html # Upload images in our bucket and replace image src for image in images: image_src = image.strip() # if image was not uploaded to hackapad s3 ignore if not image_src.startswith( 'https://hackpad-attachments.s3.amazonaws.com/'): continue logging.info("[IMG] Processing image %s" % image_src) #get image mime_type mime_type_info = mimetypes.guess_type(image_src) mime_type = mime_type_info[0] if mime_type_info[0] else 'image/jpeg' # construct expire and cache_control headers days = 100 cache_control = 'max-age= %d' % (60 * 60 * 24 * days) expires = datetime.utcnow() + timedelta(days=days) expires = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") try: logging.info("[IMG] First try for image %s", image_src) # get image name image_url_parts = image_src.split('/') image_name = image_url_parts # read image url image_src_parsed = urllib.parse.urlparse(image_src) image_name_encoded = urllib.parse.quote(image_src_parsed.path) file = io.BytesIO( urllib.request.urlopen( urllib.parse.urljoin(image_src, image_name_encoded)).read()) img = Image.open(file, mode='r') except urllib.error.HTTPError as error: logging.warning( "[IMG] First try block resulted in urllib.error.HTTPError: %s" % error) try: logging.info("[IMG] retry for image %s", image_src) file = io.BytesIO(urllib.request.urlopen(image_src).read()) img = Image.open(file, mode='r') except urllib.error.HTTPError as error: logging.error("[IMG] %s", error.read()) continue except UnicodeEncodeError: logging.error("[IMG] UnicodeEncodeError for image %s", image_src) continue # get the image extension image_parts = image_src_parsed.path.split('.') image_extension = 'JPEG' if image_parts[-1].upper( ) == 'JPG' else image_parts[-1] # hack for weird image URLs if len(image_extension) > 4: image_extension = 'png' # stream file in binary mode imgByteArr = io.BytesIO() img.save(imgByteArr, format=image_extension.upper()) imgByteArr = imgByteArr.getvalue() # upload image to our bucket # First check if it already exists exists = False try: s3.Object(bucket_name, bucket_folder + image_name[-1]).load() except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": exists = False else: exists = True if exists: logging.info("[IMG] Skipping upload: %s already exists" % image_src) else: logging.info("[IMG] Uploading %s" % image_src) s3.Bucket(bucket_name).put_object(Key=bucket_folder + image_name[-1], Body=imgByteArr, ACL='public-read', ContentType=mime_type, CacheControl=cache_control, Expires=expires) logging.info("[IMG] Replace %s with %s" % (image_src, 'https://s3-eu-west-1.amazonaws.com/' + bucket_name + '/' + bucket_folder + image_name[-1])) # replace the src of the image with the new uploaded location html_string = html_string.replace( image_src, 'https://s3-eu-west-1.amazonaws.com/' + bucket_name + '/' + bucket_folder + image_name[-1]) logging.info("[IMG] Replaced with %s", image_src) logging.info("[IMG] Finished analyzing html for job %s in file %s", job_id, file_name) return html_string
project_id = json.load(f)['project_id'] bucket = storage_client.bucket('tyeoh-streetcred', user_project=project_id) metadata_dir = os.path.join(app_dir, 'metadata') if not os.path.isdir(metadata_dir): os.makedirs(metadata_dir) logging.info('Created %s' % metadata_dir) for api in apis: logging.info('Loading data for %s' % api) request_date = datetime.now( tz=pytz.timezone('Singapore')).strftime('%Y%m%d') df = generate_table(api) dest_path = os.path.join( metadata_dir, '%s_%s_metadata.csv.xz' % (request_date, api)) df.to_csv(dest_path, index=False, header=True, compression='xz') logging.info('Saved data to %s' % dest_path) upload_blob( bucket, dest_path, '%s_metadata/%s_%s_metadata.csv.xz' % (api, request_date, api)) os.remove(dest_path) logging.info('Deleted %s' % dest_path) except Exception as e: logging.error("Exception occurred", exc_info=True) raise else: logging.info('Script complete')
def delete_batch(self, indexName, indexType, docids): actions = self._buildDeleteActions(indexName, indexType, docids) success, errors = helpers.bulk(self.esConn, actions) # @UnusedVariable if errors: logging.error("Delete batch: there are some errors %s", errors)
logger.setLevel(logging.INFO) #Parses the arugment provided from the command line. parser = argparse.ArgumentParser() parser.add_argument("--cass_keyspace", help="keyspace") parser.add_argument("--cass_table", help="table") parser.add_argument("--mongo_db", help="Mongo db") parser.add_argument("--mongo_collection", help="Mongo collection") parser.add_argument("--incremental_run", help="Full table load or incremental run") args = parser.parse_args() if not (args.cass_keyspace and args.cass_table and args.mongo_db and args.mongo_collection and args.incremental_run): logging.error( "Command line arguments are missing. Possibly --cass_keyspace --cass_table --mongo_db --mongo_collection --incremental_run " ) sys.exit() if args.incremental_run not in ['0', '1']: logging.error("Incremental run should be either 0 or 1") sys.exit() incremental_run = int(args.incremental_run) logging.info("Argument parsed successfully") #Spawn spark session spark = pyspark.sql.SparkSession.builder\ .appName('test-mongo')\ .master('local[*]')\ .getOrCreate() df = read_from_cassandra(incremental_run, args.cass_keyspace,
def __init__(self): args = parse_args() print 'Arguments: %s' % args self.name = "microdrop.app" # get the version number self.version = "" try: raise Exception version = subprocess.Popen(['git','describe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE).communicate()[0].rstrip() m = re.match('v(\d+)\.(\d+)-(\d+)', version) self.version = "%s.%s.%s" % (m.group(1), m.group(2), m.group(3)) branch = subprocess.Popen(['git','rev-parse', '--abbrev-ref', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE).communicate()[0].rstrip() if branch.strip() != 'master': self.version += "-%s" % branch except: import pkg_resources version = pkg_resources.get_distribution('microdrop').version dev = ('dev' in version) self.version = re.sub('\.dev.*', '', re.sub('post', '', version)) if dev: self.version += "-dev" self.realtime_mode = False self.running = False self.builder = gtk.Builder() self.signals = {} self.plugin_data = {} # these members are initialized by plugins self.experiment_log_controller = None self.config_controller = None self.dmf_device_controller = None self.protocol_controller = None self.main_window_controller = None # Enable custom logging handler logger.addHandler(CustomHandler()) self.log_file_handler = None # config model try: self.config = Config(args.config) except IOError: logging.error('Could not read configuration file, `%s`. Make sure' ' it exists and is readable.', args.config) raise SystemExit(-1) # set the log level if self.name in self.config.data and ('log_level' in self.config.data[self.name]): self._set_log_level(self.config.data[self.name]['log_level']) logger.info('Microdrop version: %s', self.version) logger.info('Running in working directory: %s', os.getcwd()) # Run post install hooks for freshly installed plugins. # It is necessary to delay the execution of these hooks here due to # Windows file locking preventing the deletion of files that are in use. post_install_queue_path = \ path(self.config.data['plugins']['directory']) \ .joinpath('post_install_queue.yml') if post_install_queue_path.isfile(): post_install_queue = yaml.load(post_install_queue_path.bytes()) post_install_queue = map(path, post_install_queue) logger.info('[App] processing post install hooks.') for p in post_install_queue: try: info = get_plugin_info(p) logger.info(" running post install hook for %s" % info.plugin_name) plugin_manager.post_install(p) finally: post_install_queue.remove(p) post_install_queue_path.write_bytes(yaml.dump(post_install_queue)) # Delete paths that were marked during the uninstallation of a plugin. # It is necessary to delay the deletion until here due to Windows file # locking preventing the deletion of files that are in use. deletions_path = path(self.config.data['plugins']['directory'])\ .joinpath('requested_deletions.yml') if deletions_path.isfile(): requested_deletions = yaml.load(deletions_path.bytes()) requested_deletions = map(path, requested_deletions) logger.info('[App] processing requested deletions.') for p in requested_deletions: try: if p != p.abspath(): logger.info(' (warning) ignoring path %s since it '\ 'is not absolute' % p) continue if p.isdir(): info = get_plugin_info(p) if info: logger.info(' deleting %s' % p) cwd = os.getcwd() os.chdir(p.parent) try: path(p.name).rmtree() #ignore_errors=True) except Exception, why: logger.warning('Error deleting path %s (%s)'\ % (p, why)) raise os.chdir(cwd) requested_deletions.remove(p) else: # if the directory doesn't exist, remove it from the # list requested_deletions.remove(p) except (AssertionError,): logger.info(' NOT deleting %s' % (p)) continue
def catch_exception(session, e): logging.error("DbConnectorRetrying error. Catch exception with traceback") logging.exception(e) session.rollback()
def parse_env(): args = SimpleNamespace(**{}) try: args.WORKING_DIR = os.environ['WORKING_DIR'] except KeyError: args.WORKING_DIR = '.' args.BACKUP_GIT_WORKING_DIR = args.WORKING_DIR + '/backup' args.SECRET_GIT_WORKING_DIR = args.WORKING_DIR + '/secret' args.temp_ssh_file = None args.temp_cert_file = None try: args.GIT_SSH_PRIVATE_KEY_LOC = os.environ['GIT_SSH_PRIVATE_KEY_LOC'] except KeyError: try: private_key = os.environ['GIT_SSH_PRIVATE_KEY'] args.GIT_SSH_PRIVATE_KEY_LOC = args.temp_ssh_file = args.WORKING_DIR + '/ssh_key' f = open(args.GIT_SSH_PRIVATE_KEY_LOC, 'w') f.write(private_key) f.close() os.chmod(args.GIT_SSH_PRIVATE_KEY_LOC, 0o600) except KeyError: log.error( 'Either GIT_SSH_PRIVATE_KEY_LOC or GIT_SSH_PRIVATEY_KEY environment variable must be set.' ) exit(1) try: args.LOG_LEVEL = os.environ['LOG_LEVEL'] except KeyError: args.LOG_LEVEL = 'WARNING' try: args.BACKUP_GIT_REPO = os.environ['BACKUP_GIT_REPO'] except KeyError: log.error('BACKUP_GIT_REPO environment variable must be set.') exit(1) try: args.SECRET_GIT_REPO = os.environ['SECRET_GIT_REPO'] except KeyError: log.error('SECRET_GIT_REPO environment variable must be set.') exit(1) try: args.KUBERNETES_SERVICE_HOST = os.environ['KUBERNETES_SERVICE_HOST'] except KeyError: log.error('KUBERNETES_SERVICE_HOST environment variable must be set.') exit(1) try: args.KUBERNETES_SERVICE_PORT = os.environ['KUBERNETES_SERVICE_PORT'] except KeyError: log.error('KUBERNETES_SERVICE_PORT environment variable must be set.') exit(1) try: args.KUBERNETES_TOKEN = os.environ['KUBERNETES_TOKEN'] except KeyError: log.error('KUBERNETES_TOKEN environment variable must be set.') exit(1) try: args.SERVICE_CERT_FILENAME = os.environ['SERVICE_CERT_FILENAME'] except KeyError: try: service_cert = os.environ['SERVICE_CERT'] args.SERVICE_CERT_FILENAME = args.temp_cert_file = args.WORKING_DIR + '/ca.crt' f = open(args.SERVICE_CERT_FILENAME, 'w') f.write(service_cert) f.close() os.chmod(args.SERVICE_CERT_FILENAME, 0o600) except KeyError: log.error( 'Either SERVICE_CERT_FILENAME or SERVICE_CERT environment variable must be set.' ) exit(1) return args
def download_image(source, destination): try: urllib.request.urlretrieve(source, destination) except urllib.error.URLError as e: logging.error(e) raise urllib.error.URLError