def get_vbb_data(centre): global stations global station_types g = Graph() with open('nodes.ndjson') as f: dataSta = ndjson.load(f) # convert to and from objects textSta = ndjson.dumps(dataSta) dataSta = ndjson.loads(textSta) for i in dataSta: #tupel = str(i['metadata']['x'])+","+str(i['metadata']['y']) x = float(i['metadata']['longitude']) y = float(i['metadata']['latitude']) idSt = str(i['id']) g.add_node(idSt) stations[idSt] = (x, y) # g.add_node(tupel) with open('edges.ndjson') as f: dataDist = ndjson.load(f) # convert to and from objects textDist = ndjson.dumps(dataDist) dataDist = ndjson.loads(textDist) for i in dataDist: stationA = str(i['source']) stationB = str(i['target']) distance = int(i['metadata']['time']) line = i['metadata']['line'] if line.startswith('RB') or line.startswith('RB'): station_types[stationA] = 1 station_types[stationB] = 1 elif line.startswith('U') or line.startswith('S'): if stationA in station_types: if station_types[stationA] > 1: station_types[stationA] = 2 else: station_types[stationA] = 2 if stationB in station_types: if station_types[stationB] > 1: station_types[stationB] = 2 else: station_types[stationB] = 2 else: if stationA in station_types: if station_types[stationA] > 2: station_types[stationA] = 3 else: station_types[stationA] = 3 if stationB in station_types: if station_types[stationB] > 2: station_types[stationB] = 3 else: station_types[stationB] = 3 g.add_edge(stationA, stationB, distance) return dijsktra(g, centre) # Station name of Dabendorf node: 900000245024
def get_json(username, game_mode="bullet", update=True, ensure_complete=False): json_file_path = f'data\lichess_{username}_{game_mode}.json' url = f'https://lichess.org/api/games/user/{username}' headers = {'Accept': 'application/x-ndjson'} parameters = {'rated': 'true', 'perfType': game_mode, 'max': 500} json_file = Path(json_file_path) if not json_file.is_file(): print(f"File {json_file_path} not found, downloading...") r = requests.get(url, headers=headers, params=parameters) print(f"Download complete.") ensure_complete = True with open(json_file_path, 'w') as f: json_games = ndjson.loads(r.text) ndjson.dump(json_games, f) else: with open(json_file, 'r') as file: json_games = ndjson.loads(file.read()) if ensure_complete: until = json_games[-1]['createdAt'] parameters['until'] = until old_games = True while old_games: until_date = datetime.fromtimestamp(until / 1000) print(f"Checking games before {until_date:%d/%m/%y %H:%M}...") r = requests.get(url, headers=headers, params=parameters) old_games = ndjson.loads(r.text) if old_games: until = old_games[-1]['createdAt'] parameters['until'] = until print(f'Found {len(old_games)} older games.') json_games += old_games with open(json_file_path, 'a') as f: f.write('\n') ndjson.dump(old_games, f) else: print('No older games found') if not update: return json_games since = json_games[0]['createdAt'] parameters['since'] = since del parameters['max'] since_date = datetime.fromtimestamp(since / 1000) print(f"Checking games after {since_date:%d/%m/%y %H:%M}...") r = requests.get(url, headers=headers, params=parameters) new_games = ndjson.loads(r.content) if new_games: print(f'Found {len(new_games)} new games') with open(json_file_path, 'w') as f: ndjson.dump(new_games + json_games, f) else: print('No newer games found') return new_games + json_games
def handler(event, context): s3 = boto3.resource('s3') obj = s3.Object("abalustre-btc-contracts", event["Records"][0]['s3']['object']['key']) body = obj.get()["Body"].read().decode("utf-8") es = Elasticsearch( cloud_id = (os.environ["ELASTICID"]), http_auth = (os.environ["ELASTICUSER"], os.environ["ELASTICPASSWORD"])) read_lines = read_file_lines(body) # read file into list of lines tg_list = target_and_group_list() g_list = segmenta_imbarq(lines=read_lines, target_group_list=tg_list) btc_bulk = g_list[6] btc_obj_list = build_btc_contract_info(btc_bulk) btc_contract_list = [] for i in range(len(btc_obj_list)): obj = btc_obj_list[i] contract_id = obj["contract-number"] position_date = obj["position-date"].strftime("%Y-%m-%d") btc_contract_list({ "_index": "daily-position", "_type": "_doc", "_id": "C:"+contract_id+"_D:"+position_date, "_source": obj }) json_data = '\n'.join(json.dumps(contract) for contract in btc_contract_list) data_post = ndjson.loads(json_data) helpers.bulk(es, data_post) return btc_contract_list
def is_dnsdbflex(data: str) -> bool: """Check if the supplied data conforms to the dnsdbflex output (which only contains rrname and rrtype) Parameters ---------- ndjson data as a string Returns ------- True or False Raises -------- none """ try: j = ndjson.loads(data) for line in j: if not set(line.keys()) == {'rrname', 'rrtype'}: return False # shortcut. We assume it's not if a single line does not conform return True except Exception as ex: print( "oops, this should not have happened. Maybe not an ndjson file? Reason: %s" % (str(ex), ), file=sys.stderr) return False
def fetch_kibana_object(obj_type, exportpath): try: print('# Fetching kibana objects: %s' % obj_type) response = requests.post(KIBANA_OBJECTS_EXPORT_URL, json={'type':obj_type}, verify=False, auth=(KIBANA_USER,KIBANA_PASS), headers={'kbn-xsrf':'true'}) if response.status_code != 200: print('!!! Error fetching kibana object %s: HTTP status code %s' % (obj_type, response.status_code)) else: rawData = response.text.encode('utf-8') items = ndjson.loads(rawData) if obj_type != 'index-pattern': toExport = [] for ip in items: if 'attributes' in ip.keys() and 'title' in ip['attributes']: if re.match(REDELK_OBJ_FILTER, ip['attributes']['title'], re.IGNORECASE): ip.pop('updated_at', None) ip['version'] = '1' toExport.append(ip) export_file = os.path.join(exportpath, '%s%s.ndjson' % (EXPORT_FILES_PREFIX_KIBANA, obj_type)) print('\tExporting %s: %s' % (obj_type, export_file)) with open(export_file, 'w') as f: ndjson.dump(toExport, f) else: for ip in items: if 'attributes' in ip.keys() and 'title' in ip['attributes']: if re.match(INDEX_PATTERNS_FILTER, ip['attributes']['title'], re.IGNORECASE): #print('%s: %s' % (obj_type,ip['attributes']['title'])) pn = ip['attributes']['title'][:-2] if ip['attributes']['title'].endswith('-*') else ip['attributes']['title'] ip.pop('updated_at', None) ip['version'] = '1' export_file = os.path.join(exportpath, '%s%s_%s.ndjson' % (EXPORT_FILES_PREFIX_KIBANA, obj_type, pn)) print('\tExporting %s: %s' % (obj_type, export_file)) with open(export_file, 'w') as f: ndjson.dump([ip], f) except Exception as e: print('!!! Error fetching kibana object %s: %s' % (obj_type, e))
def populate_explanations(self, x_out, ignore_first=False): r_index = None out_x = None xp_map = self.columnInfo.xpMap if x_out.status_code == 200: out_x = ndjson.loads(x_out.content.decode()) isFirst = True for item in out_x: if ignore_first and isFirst: isFirst = False continue oitem = self.outputMap.get(item['time']) if oitem is None: oput = FalkonryOutput(self.columnInfo) oput.set_entity_batch(item) oput.time = item['time'] oput.explanations[xp_map[item['signal']]] = item['score'] self.outputMap[oput.time] = oput self.timeArrowList.append({"ts": oput.time, "complete": 0}) else: oitem.explanations[xp_map[item['signal']]] = item['score'] r_index = item['index'] return r_index
def get_posting_list_for_token(token: str) -> dict: """ Get posting list of token """ posting_list: dict = dict() for file in file_names: if token <= file: with open(FINAL_INDEX_PATH + "{}.json".format(file), "r") as file_r: # ,buffering=const_size_in_bytes while file_r: # while not eof line = file_r.readline() if line: term, posting_list_r = ndjson.loads(line)[0] if token == term: # if found token in document print("Token '{}' is processing".format(token)) return dict( sorted(posting_list_r.items(), key=lambda x: int(x[0]))) elif token < term: # optimization, not go through all document, if not found token, # but token already bigger value than term print("Word '{}' not found across all documents". format(token)) return posting_list return posting_list
def _populate_value(self, c_input, isCondition, ignore_first=False): """ Common method to populate condition/prediction label, and confidence score. """ r_index = None out_c = None if c_input.status_code == 200: out_c = ndjson.loads(c_input.content.decode()) isFirst = True for item in out_c: if ignore_first and isFirst: isFirst = False continue oitem = self.outputMap.get(item['time']) if oitem is None: oitem = FalkonryOutput(self.columnInfo) oitem.set_entity_batch(item) oitem.time = item['time'] self.outputMap[oitem.time] = oitem self.timeArrowList.append({"ts": oitem.time, "complete": 0}) if (isCondition): oitem.condition = item['value'] else: oitem.confidence = item['value'] r_index = item['index'] return r_index
def get_last(channel_id): read_storage_client = storage.Client() bucket_name = 'airqo-bucket' filename = 'channel%s.json'%channel_id bucket = read_storage_client.get_bucket(bucket_name) stats = storage.Blob(bucket=bucket, name=filename).exists(storage_client) #size= storage.get_blob(bucket=bucket, name=filename).chunksize if not stats: last_id = 0 last_time = None else: blob = bucket.get_blob(filename) json_data_string = blob.download_as_string() json_data=ndjson.loads(json_data_string) json_list = [] for item in json_data: json_list.append(item) if len(json_list) != 0: last_id = json_list[-1]['entry_id'] last_time = str_to_date(json_list[-1]['created_at']) else: last_id= None last_time=None return last_id,last_time
def parse_and_insert_dnsdbflex(data: str): """Parse and validate the more simplier dndsdbflex output data. Parameters ---------- data as a string Returns ------- A dict with either the error message or the data which may be sent off the the caller of handler() Raises -------- none """ objects = [] try: entries = ndjson.loads(data) for entry in entries: # iterate over all ndjson lines # validate here (simple validation or full JSON Schema validation) if not validate_dnsdbflex(entry): return { "error": "Could not validate the dnsdbflex input '%s'" % entry } # Next, extract some fields rrtype = entry['rrtype'].upper() rrname = entry['rrname'].rstrip('.') # create a new MISP object, based on the passive-dns object for each nd-JSON line try: o = MISPObject(name='passive-dns', standalone=False, distribution=0, comment='DNSDBFLEX import by cof2misp') o.add_attribute('rrtype', value=rrtype, distribution=0, comment='DNSDBFLEX import by cof2misp') o.add_attribute('rrname', value=rrname, distribution=0, comment='DNSDBFLEX import by cof2misp') except Exception as ex: print("could not create object. Reason: %s" % str(ex)) # # add dnsdbflex entry to MISP object # objects.append(o.to_json()) r = {'results': {'Object': [json.loads(o) for o in objects]}} except Exception as ex: misperrors[ "error"] = "An error occured during parsing of input: '%s'" % ( str(ex), ) return misperrors return r
def VTH_SetSecPanel(self, CVQ6081_Alarm): if CVQ6081_Alarm == 0: self.AlarmEnable = False else: self.AlarmEnable = True self.AlarmProfile = AlarmProfile[ CVQ6081_Alarm ] query_args = { "method":"configManager.setConfig", "magic" : "0x1234", "params":{ "table":{ "AlarmEnable" : self.AlarmEnable, "CurrentProfile" : self.AlarmProfile, "ProfileEnable" : True, "Profiles" : self.AlarmConfig }, "name":"CommGlobal", }, "session":self.SessionID, "id":self.ID } if verbose: log.info("[" + str(datetime.datetime.now()) + " VTH_BOX] Updating to: {}".format(query_args)) if verbose: log.info("VTH_SetSecPanel Service Call request: {}".format(json.dumps(query_args))) data = self.P2P(json.dumps(query_args)) if data == None: log.failure("[" + str(datetime.datetime.now()) + " VTH_BOX-P2P_FAILURE] SetSecPanel Failed - No answer" ) self.VTH_ON_LINE = self.P2P_traceError() # KO return False elif len(data) == 1: if verbose: log.info("P2P-1. VTH_SetSecPanel Service Call answer: {}".format(data)) data = json.loads(data) if data.get('result'): self.VTH_ON_LINE = self.P2P_traceError() # KO return False else: self.VTH_ON_LINE = self.P2P_traceError() # KO return False else: if verbose: log.info("P2P-2. VTH_SetSecPanel Service Call answer: {}".format(data)) data = ndjson.loads(data) if data[0].get('method') == "client.notifyConfigChange": self.AlarmEnable = data[0]['params']['table']['AlarmEnable'] self.AlarmProfile = data[0]['params']['table']['CurrentProfile'] self.AlarmConfig = data[0]['params']['table']['Profiles'] if verbose: log.info("[" + str(datetime.datetime.now()) + " VTH_BOX-AlarmEnable] been changed remotely to: {}".format(self.AlarmEnable)) if verbose: log.info("[" + str(datetime.datetime.now()) + " VTH_BOX-AlarmProfile] is: {}".format(self.AlarmProfile)) if verbose: log.info("[" + str(datetime.datetime.now()) + " VTH_BOX-AlarmConfiguration] is: {}".format(self.AlarmConfig)) if not self.AlarmEnable: AlarmToken['nvalue'] = 0 else: AlarmToken['nvalue'] = VTHAlarmProfile[ self.AlarmProfile ] self.VTH_ON_LINE = 0 # OK return True
def load_data(): data = ndjson.loads(req_data["data"]) for event in data: if type(event) == type([]): for e in event: yield e else: yield event
def get(self): covid_19 = Covid_19(storage.Client()) covid_19_blob = covid_19.get_json() if (covid_19_blob != None and covid_19_blob.exists() == False): abort(400, "No such data found") elif (covid_19_blob == None): return "Error fetching file" return ndjson.loads(covid_19_blob.download_as_string())
def _deserialize_ndjson_string(byte_string) -> List[object]: """ Deserialize the contents of a newline-delimited JSON string to a list Args: byte_string: The NDJSON contents to be deserialized Returns: list: Each individual JSON entry deserialized as Python objects """ utf8_string = str(byte_string, 'utf-8') content = ndjson.loads(utf8_string) return content
async def stream_game_state(self, game): url = 'https://lichess.org/api/bot/game/stream/'+game async with self.session.get(url) as response: chunk = await response.content.read(0x100000) print(chunk) try: chunk = ndjson.loads(chunk) print(chunk) except Exception as e: print('Error parsing game stream:', e) exit() return chunk
def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: """ Fetches the remote ndjson file and caches the results. Args: url (str): Can be any url pointing to an ndjson file. Returns: ndjson as a list of dicts. """ response = requests.get(url) response.raise_for_status() return ndjson.loads(response.text)
def main(): # Read the data from Google Cloud Storage read_storage_client = storage.Client() # Set buckets and filenames bucket_name = "all_alerts_7_21" #I'm using project: cops-cloudmonus-nonprod-563b in mckesson's gcp filename = "sample_json.json" # get bucket with name bucket = read_storage_client.get_bucket(bucket_name) # get bucket data as blob blob = bucket.get_blob(filename) # convert to string json_data_string = blob.download_as_string() #print(json_data_string) json_data = ndjson.loads(json_data_string) #print(json_data) #json_data = json.loads(json_data_string) list = [] for item in json_data: list.append(item) #print(item) #print(item['Website']) list1 = list[0:len(list)] print(list1) #removing something list_less = [] for item in list1: if item["Website"] != "Yandex": list_less.append(item) result = "" for item in list_less: item2 = json.dumps(item) result = result + str(item2) + "\n" #adding something else #item={"Website": "Yandex", "URL": "Yandex.com", "ID": 4} #item2=json.dumps(item) #result = result + str(item2) + "\n" print(result) #result_json=json.dumps(result) #print(result_json) # Write the data to Google Cloud Storage """
def write_data(data, topicname): list = ndjson.loads(data) for eachline in list: msg = str(eachline) futures.update({msg: None}) # When you publish a message, the client returns a future. future = publisher.publish( topicname, msg.encode("utf-8") # data must be a bytestring. ) futures[msg] = future # Publish failures shall be handled in the callback function. future.add_done_callback(get_callback(future, msg))
def test_delete_log(): resp1 = requests.post("http://txlogging:8080/log", json=msg) assert resp1.status_code == 200 time.sleep(10) resp2 = requests.get("http://txlogging:8080/log", params={}) assert resp2.status_code == 200 assert len(ndjson.loads(resp2.text)) == 1 resp3 = requests.delete("http://txlogging:8080/log") assert resp3.status_code == 200 resp4 = requests.get("http://txlogging:8080/log", params={}) assert resp4.status_code == 200 assert len(ndjson.loads(resp4.text)) == 0 requests.delete("http://txlogging:8080/log")
def crawler(self, data: str): data = ndjson.loads(data) self.json_schema = self.__get_json_schema(data) columns = [] for column_name, column_prop in self.json_schema['items']['properties'].items(): column = { 'name': column_name, 'type': self.__get_type(column_prop), 'nullable': self.__is_nullable(column_prop), 'default': None } columns.append(column) return columns
def test_logging(): resp1 = requests.post("http://txlogging:8080/log", json=msg) assert resp1.status_code == 200 time.sleep(10) resp2 = requests.get("http://txlogging:8080/log", params={}) assert resp2.status_code == 200 assert len(ndjson.loads(resp2.text)) == 1 start = "2001-01-01T00:00:00-01:00" end = "2002-01-01T00:00:00-01:00" resp3 = requests.get("http://txlogging:8080/log", params={ "start": start, "end": end }) assert resp3.status_code == 200 assert len(ndjson.loads(resp3.text)) == 1 start = "2002-01-01T00:00:00-01:00" end = "2003-01-01T00:00:00-01:00" resp4 = requests.get("http://txlogging:8080/log", params={ "start": start, "end": end }) assert resp4.status_code == 200 assert len(ndjson.loads(resp4.text)) == 0 requests.delete("http://txlogging:8080/log")
def get_cloudtrail_file(key_prefix): s3 = boto3.resource('s3') b = s3.Bucket(settings["logs_bucket"]) result = [] for obj in b.objects.filter(Prefix=key_prefix): print(obj.key) s3_object = s3.Object(settings["logs_bucket"], obj.key).get() object_content = s3_object['Body'].read() results = gzip.decompress(object_content) for i in ndjson.loads(results)[0]['Records']: i['event_type'] = i['eventType'] i['event_time'] = int(get_time(i).timestamp()) result.append(i) return result
def open_and_read(file_path): # file path is the path of the file as a string. object_list = [] with open(file_path) as f: for line in f: if line != "\n": stroke_dict_list = ndjson.loads(line) stroke_dict = stroke_dict_list[0] # now I have a dictionary named stroke_dict. my_stroke_list = stroke_dict.get("drawing") my_stroke_key_ids = stroke_dict.get("key_id") object_list.append([my_stroke_list, my_stroke_key_ids]) return object_list
def decode_api_response(url): '''Return JSON from url response.''' response = requests.get(url) status = response.status_code try: response = response.json() except: try: response = ndjson.loads(response.text) except: print("Bad response") response = None return (status, response)
async def stream(self): url = 'https://lichess.org/api/stream/event' while 1: async with self.session.get(url) as response: chunk = await response.content.read(0x100000) try: chunk = ndjson.loads(chunk) print(chunk) except Exception as e: print('ERROR:', e) time.sleep(2) continue if len(chunk) > 0: return chunk print('waiting for something to happen') time.sleep(4)
def remove_reference(text): new_text = [] new_references = [] try: text = json.loads(text) for ref in text['references']: if not ref['id'].startswith('AX') and len(ref['id']) != 20: new_references.append(remove_obj_keys(ref)) text['references'] = new_references except json.decoder.JSONDecodeError: text = ndjson.loads(text) for ref in text: if (not ref['references'][0]['id'].startswith('AX') and len(ref['references'][0]['id']) != 20): new_text.append(remove_obj_keys(ref)) return json.dumps(new_text) if new_text else json.dumps(text)
def process_company_json(record: dict, bucket): """ fetches file and stores it locally to fetch and preprocess returns the processed articles ## args * record: a remote raw json file storage * bucket: Google Bucket Instance * metadata: details regarding the company and the source ## returns processed articles based on MongoDB Article model """ blob = bucket.blob(record["source_file"]) # convert to string json_data_string = blob.download_as_string() # returns a list data = ndjson.loads(json_data_string) logging.info("data storage length: {}".format(len(data))) processor = getattr(source_processor, record["source"]) logging.info("processing: {}".format(record["source_file"])) processed_records = processor(data[0], record["entity_id"], record["scenario_id"], record["source_file"]) # delete duplicates here df = pd.DataFrame(processed_records, columns=[ "uuid", "entityID_id", "scenarioID_id", "title", "unique_hash", "url", "search_keyword", "published_date", "internal_source", "domain", "language", "source_country", "raw_file_source", "entry_created" ]) before = df.shape df.drop_duplicates(subset='url', keep="first", inplace=True) after = df.shape logging.info("Before: {}, After: {}".format(before, after)) processed_records = df.values.tolist() return processed_records
def raise_response_error(r): if r.status_code >= 400: print("==== Response Debugging ====") print("##Request Headers", r.request.headers) # extract content type ct = r.headers["content-type"].split(";")[0] if ct == ContentType.JSON.value: dump = dump_response(r) print(dump) print("##Response:", dump.decode("UTF-8")) err = dacite.from_dict(data_class=Error, data=r.json()) print(err) elif ct == ContentType.NDJSON.value: decoded = ndjson.loads(r.text) print("##Response:", decoded) r.raise_for_status()
def handler(event, context): s3 = boto3.resource('s3') obj = s3.Object("abalustre-btc-contracts", event["Records"][0]['s3']['object']['key']) body = obj.get()["Body"].read() #print(body[1]) lines = body.splitlines() #print(str(lines[1], 'ISO-8859-1')) # print(str(lines[10], 'utf-8')) # line10 = str(lines[10], 'utf-8') # print(line10) # utf_lines = [] # for i in range(len(lines)): # utf_lines.append(str(lines[i], 'utf-8')) # print(utf_lines) es = Elasticsearch(cloud_id=(os.environ["ELASTICID"]), http_auth=(os.environ["ELASTICUSER"], os.environ["ELASTICPASSWORD"])) tg_list = target_and_group_list() g_list = segmenta_imbarq(lines=lines, target_group_list=tg_list) btc_bulk = g_list[6] btc_obj_list = build_btc_contract_info(btc_bulk) btc_contract_list = [] for i in range(len(btc_obj_list)): obj = btc_obj_list[i] print(type(obj)) print(obj) contract_id = obj['contract_number'] position_date = obj['position_date'] btc_contract_list.append({ "_index": "btc-contracts", "_type": "_doc", "_id": "C:" + contract_id + "_D:" + position_date, "_source": obj }) json_data = '\n'.join( json.dumps(contract) for contract in btc_contract_list) data_post = ndjson.loads(json_data) helpers.bulk(es, data_post) return btc_contract_list
def create_from_url(cls, client, project_id: str, name: str, url: str, validate=True) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a publicly accessible URL to an ndjson file with predictions. Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest url (str): publicly accessible URL pointing to ndjson file containing predictions validate (bool): a flag indicating if there should be a validation if `url` is valid ndjson Returns: BulkImportRequest object """ if validate: logger.warn( "Validation is turned on. The file will be downloaded locally and processed before uploading." ) res = requests.get(url) data = ndjson.loads(res.text) _validate_ndjson(data, client.get_project(project_id)) query_str = """mutation createBulkImportRequestPyApi( $projectId: ID!, $name: String!, $fileUrl: String!) { createBulkImportRequest(data: { projectId: $projectId, name: $name, fileUrl: $fileUrl }) { %s } } """ % query.results_query_part(cls) params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) return cls(client, bulk_import_request_response["createBulkImportRequest"])