def get_processed(a1=18, a2=24, p1=0, p2=8, l=10000, g='top-1,top-10%25,top-15%25,theory'): # processor is imported in functions to avoid deadlock when running # test_process in processor.py since that imports this module. import processor if not os.path.exists('cached_data'): os.makedirs('cached_data') processed = {} a1 = int(a1) a2 = int(a2) p1 = int(p1) p2 = int(p2) l = int(l) g = urllib.unquote(g).decode('utf8') goals = g.split(',') for goal in goals: filename = "cached_data/a1%ia2%ip1%ip2%il%i-%s.json" % (a1, a2, p1, p2, l, goal) processed_goal = [] if os.path.isfile(filename): with open(filename) as fhandler: processed_goal = ujson.load(fhandler) else: compatibilities = get_compatibilities(a1, a2, p1, p2, l) processed_goal = list(processor.process(compatibilities, lifetimes=l, goal=goal)) with open(filename, 'w') as fhandler: ujson.dump(processed_goal, fhandler) processed[goal] = processed_goal return processed
def main(args): """ Main method Rolling like it's 2006 """ conn = boto.connect_s3( aws_access_key_id=access_key, aws_secret_access_key=secret_key) bucket = conn.get_bucket("tweettrack") if len(sys.argv) == 4: followertable = read_followertable(args[1], bucket) assert followertable is not None print "followertable is this long: %d, and we're saving it" % (len(followertable),) with open("followertable.json", "w") as followertable_file: ujson.dump(followertable, followertable_file) else: print "followerstable..." with open(sys.argv[4], "r") as followertable_file: followertable = ujson.load(followertable_file) print "followerstable done..." #print "gammas..." #with open(sys.argv[5], "r") as gamma_file: # gammas = ujson.load(gamma_file) # gc.collect() #print "gammas done..." gammas = get_gammas(args[2], bucket) #with open("gammas.json", "w") as gamma_file: # ujson.dump(gammas, gamma_file) do_join(args[3], followertable, gammas, bucket) conn.close()
def get_compatibilities(a1=18, a2=24, p1=0, p2=8, l=10000): compatibilities = [] a1 = int(a1) a2 = int(a2) p1 = int(p1) p2 = int(p2) l = int(l) filename = "cached_data/a1%ia2%ip1%ip2%il%i.json" % (a1, a2, p1, p2, l) if not os.path.exists('cached_data'): os.makedirs('cached_data') if os.path.isfile(filename): with open(filename) as fhandler: compatibilities = ujson.load(fhandler) else: for lt in range(1, l+1): # Number of candidates met per year should range between p1 and p2. yearly_num_candidates = [] for a in range(0, (a2-a1)): yearly_num_candidates.append(random.choice(range(p1, p2))) for year, num_candidates in enumerate(yearly_num_candidates): # Compatibility scores of candidates should follow a normal distribution. scores = np.random.normal(size=num_candidates) for score in scores: compatibilities.append({ 'lifetime': lt, 'candidate_score': round(score,3), 'candidate_age_met': a1+year }) with open(filename, 'w') as fhandler: ujson.dump(compatibilities, fhandler) return compatibilities
def process(self, id: int): """Increment offsets from a volume. """ text = Text.query.get(id) tokens = text.tokens() # Assemble token list. rows = [ dict( text_id=id, ratio=i/len(tokens), offset=i, **token._asdict() ) for i, token in enumerate(tokens) ] # Flush to disk. path = os.path.join(self.result_dir, str(uuid.uuid4())) with open_makedirs(path, 'w') as fh: ujson.dump(rows, fh)
def add_to_resources(movie): if type(movie) is not dict: return("Movie need to be specified as key:value pairs in a dictionnary. Process Aborted.") if 'alias' not in movie.keys(): return "Update has no 'alias' key. Process Aborted." if 'tag' not in movie.keys(): return "Update has no 'tag' key. Process Aborted." if 'title' not in movie.keys(): return "Update has no 'title' key. Process Aborted." if 'resources.json' not in os.listdir('.'): return " The file 'resources.json' is not in the current working directory. Process Aborted." with open('resources.json') as json_file: resource = ujson.load(json_file) if is_in_resources(resource, movie['alias']) == True : return "%s with alias '%s' and tag '%s' is already added. Need to update?.. use the update function" %(movie['title'], movie['alias'], movie['tag']) else: movie['timestamp'] = datetime.datetime.now() resource['movies'].append(movie) resource['logs'].append({ 'timestamp': datetime.datetime.now(), 'type': 'post', 'message': " '%s' with alias '%s' and tag '%s' was successfully added." %(movie['title'], movie['alias'], movie['tag']) }) with open('resources.json', 'w') as outfile: ujson.dump(resource, outfile) return "%s with alias '%s' and tag '%s' was successfully added." %(movie['title'], movie['alias'], movie['tag'])
def create(self, name=None, time=None, uid=None, container=None, **kwargs): """Create a sample locally Parameters ---------- name: str Name of the sample time: float Timestamp generated by the client uid: str Unique identifier for this sample container: str, doct.Document The container/group sample is contained within Returns ------- payload: dict Document dict that was inserted """ # TODO: Allow container to be an object if container: container = doc_or_uid_to_uid(container) payload = dict(uid=uid if uid else str(uuid4()), name=name, time=time if time else ttime.time(), container=container if container else 'NULL', **kwargs) self.sample_list.append(payload) with open(self._samp_fname, 'w+') as fp: ujson.dump(self.sample_list, fp) return payload
def semantic_labeling(train_dataset, test_dataset, train_dataset2=None, evaluate_train_set=False, reuse_rf_model=True): """Doing semantic labeling, train on train_dataset, and test on test_dataset. train_dataset2 is optionally provided in case train_dataset, and test_dataset doesn't have overlapping semantic types For example, given that train_dataset is soccer domains, and test_dataset is weather domains; the system isn't able to recognize semantic types of test_dataset because of no overlapping. We need to provide another train_dataset2, which has semantic types of weather domains; so that the system is able to make prediction. Train_dataset2 is default to train_dataset. (train_dataset is use to train RandomForest) :param train_dataset: str :param test_dataset: str :param train_dataset2: Optional[str] :param evaluate_train_set: bool :param reuse_rf_model: bool :return: """ logger = get_logger("semantic-labeling-api", format_str='>>>>>> %(asctime)s - %(levelname)s:%(name)s:%(module)s:%(lineno)d: %(message)s') if train_dataset2 is None: train_dataset2 = train_dataset datasets = [train_dataset, test_dataset] else: datasets = [train_dataset, test_dataset, train_dataset2] semantic_labeler = SemanticLabeler() # read data into memory logger.info("Read data into memory") semantic_labeler.read_data_sources(list(set(datasets))) # index datasets that haven't been indexed before not_indexed_datasets = list({dataset for dataset in datasets if not is_indexed(dataset)}) if len(not_indexed_datasets) > 0: logger.info("Index not-indexed datasets: %s" % ",".join(not_indexed_datasets)) semantic_labeler.train_semantic_types(not_indexed_datasets) # remove existing file if not reuse previous random forest model if not reuse_rf_model and os.path.exists("model/lr.pkl"): os.remove("model/lr.pkl") # train the model logger.info("Train randomforest... with args ([1], [%s]", train_dataset) semantic_labeler.train_random_forest([1], [train_dataset]) # generate semantic typing logger.info("Generate semantic typing using: trainset: %s, for testset: %s", train_dataset, test_dataset) result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, test_dataset) if not os.path.exists("output"): os.mkdir("output") with open("output/%s_result.json" % test_dataset, "w") as f: ujson.dump(result, f) if evaluate_train_set: logger.info("Generate semantic typing for trainset") result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, train_dataset2) with open("output/%s_result.json" % train_dataset2, "w") as f: ujson.dump(result, f) return result
def test_dumpFileArgsError(self): try: ujson.dump([], '') except TypeError: pass else: assert False, 'expected TypeError'
def create(self, uid=None, time=None, container=None, **kwargs): """ Create a container locally. Parameters ---------- time: float Timestamp generated by the client uid: str Unique identifier for this sample container: str, doct.Document, optional Container this container is contained within Returns ------- payload: dict Document dict that was inserted """ if container: container = doc_or_uid_to_uid(container) payload = dict(uid=uid if uid else str(uuid4()), container=container if container else 'NULL', time=time if time else ttime.time(), **kwargs) self.container_list.append(payload) with open(self._cont_fname, 'w+') as fp: ujson.dump(self.container_list, fp) return payload
def saveTweets(self): meaningful = self.jsonAccepted*self.cfg['KeepAccepted'] + self.jsonPartial*self.cfg['KeepPartial'] + self.jsonExcluded*self.cfg['KeepExcluded'] if len(meaningful)>1: print "\nDumping tweets to file, contains %s tweets with %s accepted, %s rejected, %s partial matches, and %s irrelevant" % (len(meaningful), self.acceptedCount, self.excludedCount, self.partialCount, self.irrelevantCount) if self.cfg['TweetData'] != 'all': meaningful = cleanJson(meaningful,self.cfg,self.tweetTypes) #timeStamp = datetime.date.today().strftime("%A") timeStamp = self.startTime self.lastWrite = self.startDay if self.cfg['KeepRaw']: with open(self.pathOut+'Raw_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile: json.dump(self.jsonRaw,outFile) outFile.close() with open(self.pathOut+'FilteredTweets_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile: json.dump(meaningful,outFile) outFile.close() print 'Json text dump complete, buffering....' time.sleep(1) giSeeker.flushTweets(self) else: print "No tweets found for date" print "Updating geoPickle" self.geoCache = updateGeoPickle(self.geoCache,getPickleName(self.cfg),self.cfg)
def convert_to_json(lang_url, lang_code): """A handy json converter just pass the lang_code and the url of the json source.""" data = requests.get(lang_url) node_data = ujson.loads(data.content) dump_json = os.path.join(BUILD_PATH, "%s_node_data.json" % lang_code) with open(dump_json, "w") as f: ujson.dump(node_data, f)
def saveTweets(self): print "\nDumping tweets to file, contains %s tweets with %s accepted, %s rejected, %s partial matches, and %s irrelevant" % (self.cfg['StopCount'], self.acceptedCount, self.excludedCount, self.partialCount, self.irrelevantCount) print '\tJson text dump complete....\n' meaningful = self.jsonAccepted*self.cfg['KeepAccepted'] + self.jsonPartial*self.cfg['KeepPartial'] + self.jsonExcluded*self.cfg['KeepExcluded'] if self.cfg['TweetData'] != 'all': meaningful = cleanJson(meaningful,self.cfg,self.tweetTypes) timeStamp = self.startTime if self.cfg['KeepRaw']: with open(self.pathOut+'Raw_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile: json.dump(self.jsonRaw,outFile) outFile.close() with open(self.pathOut+'FilteredTweets_'+self.cfg['FileName']+'_'+timeStamp+'.json', 'w') as outFile: json.dump(meaningful,outFile) outFile.close() giListener.flushTweets(self) print "Updating geoPickle" self.geoCache = updateGeoPickle(self.geoCache,self.cfg['Directory']+'caches/'+pickleName)
def store_update(self, db_name, db_desc): """Updates the database store file db_name key, with db_desc value""" store_datas = self.extract_store_datas() store_datas.update({db_name: db_desc}) json.dump(store_datas, open(self.store_file, 'w'))
def saveDatabase(self): self.proxySend("Creating dict from room objects.") db = {} for vnum, roomObj in iterItems(self.rooms): newRoom = {} newRoom["name"] = roomObj.name newRoom["desc"] = roomObj.desc newRoom["dynamicDesc"] = roomObj.dynamicDesc newRoom["note"] = roomObj.note newRoom["terrain"] = roomObj.terrain newRoom["light"] = roomObj.light newRoom["align"] = roomObj.align newRoom["portable"] = roomObj.portable newRoom["ridable"] = roomObj.ridable newRoom["mobFlags"] = list(roomObj.mobFlags) newRoom["loadFlags"] = list(roomObj.loadFlags) newRoom["x"] = roomObj.x newRoom["y"] = roomObj.y newRoom["z"] = roomObj.z newRoom["exits"] = {} for direction, exitObj in iterItems(roomObj.exits): newExit = {} newExit["exitFlags"] = list(exitObj.exitFlags) newExit["doorFlags"] = list(exitObj.doorFlags) newExit["door"] = exitObj.door newExit["to"] = exitObj.to newRoom["exits"][direction] = newExit db[vnum] = newRoom self.proxySend("Saving the database in JSon format.") with codecs.open(MAP_FILE, "wb", encoding="utf-8") as fileObj: json.dump(db, fileObj) self.proxySend("Map Database saved.")
def process(self, inputs): try: None for x in inputs: #self.log(x) prov = inputs[x] #if isinstance(prov, list) and "data" in prov[0]: # prov = prov[0]["data"] #el if "_d4p" in prov: prov = prov["_d4p"] elif "provenance" in prov: prov = prov["provenance"] filep = open( os.environ['PROV_PATH'] + "/bulk_" + getUniqueId(), "wr") ujson.dump(prov, filep) filep.close() except: self.log(traceback.format_exc())
def process(self, inputs): try: out = None for x in inputs: prov = inputs[x] if isinstance(prov, list) and "data" in prov[0]: prov = prov[0]["data"] elif "_d4p" in prov: prov = prov["_d4p"] self.bulk.append(prov) #self.log(os.environ['PBS_NODEFILE']) #self.log(socket.gethostname()) if len(self.bulk) == 100: #: # None filep = open( os.environ['PROV_PATH'] + "/bulk_" + getUniqueId(), "wr") ujson.dump(self.bulk, filep) # filep.close() self.bulk[:]=[] # for x in self.bulk: # del x except: self.log(traceback.format_exc())
def savemsgstore(): try: f = open("generalmessage.json", "w") ujson.dump(generalmessagestore, f) f.close() except: pass
def export_uploads_local_helper(realm, output_dir, local_dir): # type: (Realm, Path, Path) -> None if not os.path.exists(output_dir): os.makedirs(output_dir) count = 0 records = [] for attachment in Attachment.objects.filter(realm_id=realm.id): local_path = os.path.join(local_dir, attachment.path_id) output_path = os.path.join(output_dir, attachment.path_id) mkdir_p(os.path.dirname(output_path)) subprocess.check_call(["cp", "-a", local_path, output_path]) stat = os.stat(local_path) record = dict(realm_id=attachment.realm.id, user_profile_id=attachment.owner.id, user_profile_email=attachment.owner.email, s3_path=attachment.path_id, path=attachment.path_id, size=stat.st_size, last_modified=stat.st_mtime, content_type=None) records.append(record) count += 1 if (count % 100 == 0): logging.info("Finished %s" % (count,)) with open(os.path.join(output_dir, "records.json"), "w") as records_file: ujson.dump(records, records_file, indent=4)
def run(self): names = collections.defaultdict(set) url = "http://www.jstor.org/kbart/collections/all-archive-titles" output = shellout("""curl -sL "{url}" > {output} """, url=url) with luigi.LocalTarget(output, format=TSV).open() as handle: for row in handle.iter_tsv(): if len(row) < 27: self.logger.warn("short KBART row, skipping: %s", row) continue issns = row[1:3] parts = [p.strip() for p in row[26].split(";")] for issn in [v.strip() for v in issns]: if not issn: continue for name in parts: if not name: continue names[issn].add(name) with self.output().open('w') as output: import json # ujson does not support cls keyword json.dump(names, output, cls=SetEncoder)
def run_experiment(): http_client = AsyncHTTPClient() num_files = len(os.listdir("./urls")) for i, url_file in enumerate(os.listdir("./urls")): if not url_file.endswith(".json"): print "Skilling: ", url_file continue urls = json.load(open("./urls/" + url_file)) filtered_urls = filter(data_not_exists, urls) random.shuffle(filtered_urls) p = PB.ProgressBar(maxval=len(filtered_urls)//10 + 1, widgets=("{} / {}".format(i, num_files), PB.Bar(), PB.ETA())).start() for urls_chunk in p(chunk_seq(filtered_urls, 10)): try: responses = yield [http_client.fetch(url['url']) for url in urls_chunk] except: print "Failed for some result in: ", urls_chunk continue for raw, response in izip(urls_chunk, responses): url = raw['url'] data = {"url" : url, "body" : response.body, "desc" : raw['desc']} fname = url_to_filename(raw) try: os.makedirs(os.path.dirname(fname)) except OSError: pass json.dump(data, open(fname, "w+")) time.sleep(.5)
def write(manifest, manifest_path): dir_name = os.path.dirname(manifest_path) if not os.path.exists(dir_name): os.makedirs(dir_name) with open(manifest_path, "wb") as f: json.dump(manifest.to_json(), f, sort_keys=True, indent=1) f.write("\n")
def dump_event_queues(): start = time.time() with open(settings.JSON_PERSISTENT_QUEUE_FILENAME, "w") as stored_queues: ujson.dump([(qid, client.to_dict()) for (qid, client) in six.iteritems(clients)], stored_queues) logging.info("Tornado dumped %d event queues in %.3fs" % (len(clients), time.time() - start))
def setCookies(): wte = raw_input("Please input your webTradeEligibility cookie: \n") sessionid = raw_input("Please input your sessionid cookie: \n") steamLogin = raw_input("Please input your steamLogin cookie: \n") steamLoginSecure = raw_input("Please input your steamLoginSecure cookie: \n") sma = raw_input("Please input your steamMachineAuth cookie (name+value together): \n") steamRememberLogin = raw_input("Please input your steamRememberLogin cookie: \n") cookies_json['webTradeEligibility'] = wte cookies_json['sessionid'] = sessionid cookies_json['steamLogin'] = steamLogin cookies_json['steamLoginSecure'] = steamLoginSecure cookies_json['steamMachineAuth'] = sma cookies_json['steamRememberLogin'] = steamRememberLogin try: cookies_json_file = open('util/cookies.json', 'w') ujson.dump(cookies_json, cookies_json_file) cookies_json_file.close() except IOError: print "Error opening cookie.json file" return False except ValueError: print "Error dumping data to cookie.json file" return False
def dump_result(params, results, output_path): """Writes out a single result .json file in output_path. Parameters ---------- params : dict Dictionary of parameter names and values results : dict Dictionary of an alignment result output_path : str Where to write out the json file """ # Make a copy of params to avoid writing in-place below params = dict(params) # ujson can't handle infs, so we need to replace them manually: if params['norm'] == np.inf: params['norm'] = str(np.inf) # Convert params dict to a string of the form # param1_name_param1_value_param2_name_param2_value... param_string = "_".join( '{}_{}'.format(name, value) if type(value) != float else '{}_{:.3f}'.format(name, value) for name, value in params.items()) # Construct a path where the .json results file will be written output_filename = os.path.join(output_path, "{}.json".format(param_string)) # Store this result try: with open(output_filename, 'wb') as f: json.dump({'params': params, 'results': results}, f) # Ignore "OverflowError"s raised by ujson; they correspond to inf/NaN except OverflowError: pass
def combine_dicts(): with open('title10to100000.json') as tag200, open('title100000plus.json') as tag1500: tag200dict = ujson.load(tag200) tag500dict = ujson.load(tag1500) newdict = dict(chain(tag200dict.items(), tag500dict.items())) with open('titletagwords.json', 'w') as write: ujson.dump(newdict, write)
def run(self): while True: sleep(60) summary = self.statsCollector.getSummary() self.logger.info("Statistics update: {0}".format(summary)) with open(self.fileName, 'w') as f: ujson.dump(summary, f)
def save(self, data): """Save data to file. Careful, this overwrites any existing data on file. Use self.udpate() to perform partial updates. """ json.dump(data, open(self.path, 'w'))
def _update_local(fname, qparams, replacement): """Update a document created using the local framework Parameters ----------- fname: str Name of the query should be run qparams: dict Query parameters. Similar to online query methods replacement: dict Fields/value pair to be updated. Beware of disallowed fields such as time and uid """ try: with open(fname, 'r') as fp: local_payload = ujson.load(fp) qobj = mongoquery.Query(qparams) for _sample in local_payload: try: if qobj.match(_sample): for k, v in replacement.items(): _sample[k] = v except mongoquery.QueryError: pass with open(fname, 'w') as fp: ujson.dump(local_payload, fp) except FileNotFoundError: raise RuntimeWarning('Local file {} does not exist'.format(fname))
def __init__(self, path, writer_queue=None): """Initialize using path to file and optional thread-safe queue. Queue is used for json serializable data to be written to file when self.write_queued() is called. If the file at 'path' doesn't exist it will be created. """ self.path = os.path.realpath(os.path.expanduser(path)) if not os.path.exists(self.path): print("Persistence file %s does not exist yet, creating it...") json.dump({}, open(self.path, 'w')) else: # check for json-ness try: json.load(open(self.path)) LOG.debug("Loaded existing persistence file %s.", os.path.relpath(self.path)) except ValueError as err: raise ValueError("The persistence file -> %s is not " "a valid json file. | %s" % (os.path.relpath(self.path), err)) if writer_queue and not isinstance(writer_queue, Queue.Queue): raise TypeError('writer_queue should be a Queue.Queue.') elif writer_queue: self.synq = writer_queue self.synq._persisted = set() else: self.synq = None
def main(argv): args = docopt(__doc__, argv=argv) params = dict(p.split(':') for p in args['--parameters']) # format sort paramaters. if args['--sort']: for i, field in enumerate(args['--sort']): key = 'sort[{0}]'.format(i) params[key] = field.strip().replace(':', ' ') query = ' '.join(args['<query>']) if args['--itemlist']: fields = ['identifier'] else: fields = args['--field'] search = search_items(query, fields=args['--field'], params=params, v2=args['--v2']) if args['--number-found']: sys.stdout.write('{0}\n'.format(search.num_found)) sys.exit(0) for result in search: try: if args['--itemlist']: sys.stdout.write(result.get('identifier', '')) else: json.dump(result, sys.stdout) sys.stdout.write('\n') except IOError: sys.exit(0)
def write_json(filename, dataset): with codecs.open(filename, mode="w", encoding="utf-8") as f: ujson.dump(dataset, f)
def save(self): with open(PATH, "w") as outfile: ujson.dump(self.db, outfile, indent=4)
def __init__(self): if not os.path.exists(PATH): d = {} ujson.dump(d, open(PATH, "w")) self.db = ujson.load(open(PATH))
bmp180.oversample_sett = 2 bmp180.baseline = 101325 alt_init = bmp180.altitude # BNO055 bno055 = BNO055(I2C_bus) pitch_init = bno055.readEuler().angle_x pitch_init = pitch_init+180 if pitch_init <= 0 else pitch_init-180 roll_init = bno055.readEuler().angle_y # Write to file with open("/Web/www/autre.json", "r") as f: autre_file = ujson.load(f) with open("/Web/www/autre.json", "w") as f: autre_file[2]["data"]["target"] = pitch_init autre_file[1]["data"]["target"] = roll_init ujson.dump(autre_file, f) # Airspeed speed = airspeed(bmp180) speed_init = 10 # PID controllers pitch_pid = pid(0, 0, 0) roll_pid = pid(0, 0, 0) speed_pid = pid(0, 0, 0) # Read configuration pid_counter = 0 #config.readConfig(pitch_pid, roll_pid, speed_pid) # Setup mode object
def save_cm(results, num_classes): labels = [i for i in range(num_classes)] cm = confusion_matrix(results["labels"], results["predicts"], labels=labels) data = [] for target_index, target_row in enumerate(cm): for predicted_index, count in enumerate(target_row): data.append((labels[target_index], labels[predicted_index], count)) df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count']) cm_file = '/confusion_matrix.csv' with open(cm_file, 'w') as f: df_cm.to_csv(f, columns=['target', 'predicted', 'count'], header=False, index=False) lines = '' with open(cm_file, 'r') as f: lines = f.read() metadata = { 'outputs': [{ 'type': 'confusion_matrix', 'format': 'csv', 'schema': [ { 'name': 'target', 'type': 'CATEGORY' }, { 'name': 'predicted', 'type': 'CATEGORY' }, { 'name': 'count', 'type': 'NUMBER' }, ], 'source': lines, 'storage': 'inline', 'labels': list(map(str, labels)), }] } with open("/mlpipeline-ui-metadata.json", 'w') as f: ujson.dump(metadata, f) accuracy = accuracy_score(results["labels"], results["predicts"]) send_manage(accuracy) metrics = { 'metrics': [{ 'name': 'accuracy-score', 'numberValue': accuracy, 'format': "PERCENTAGE", }] } with open('/accuracy.json', 'w') as f: ujson.dump(accuracy, f) with open('/mlpipeline-metrics.json', 'w') as f: ujson.dump(metrics, f)
def dump_pool_config(): with open(_pool_config_file, 'w', encoding='utf8') as f: json.dump(_group_pool, f, ensure_ascii=False)
def write_settings(self): ujson.dump(self.__dict__, open(self.settings_path, 'w+'))
def __init__(self): if not os.path.exists(PATH): with open(PATH, "w") as f_x: ujson.dump({}, f_x) with open(PATH) as yt_db: self.db = ujson.load(yt_db)
def run_game(game, dockers, args, sock_file, scrimmage=False): ''' This contains the logic that needs to be cleaned up at the end of a game If there is something that needs to be cleaned up add it in the try catch loop surrounding the catch loop surrounding the call of the function ''' # Start the unix stream server main_server = server.start_server(sock_file, game, dockers) viewer_server = server.start_viewer_server(PORT, game) try: # Start the docker instances for player_key in dockers: docker_inst = dockers[player_key] docker_inst.start() for player_ in game.players: if player_['id'] == player_key: player = player_['player'] break if player.planet == bc.Planet.Earth: planet = 'earth' else: planet = 'mars' if player.team == bc.Team.Blue: team = 'blue' else: team = 'red' name = '[{}:{}]'.format(planet, team) # 10 MB of logs in scrimmage, unlimited logging otherwise logger = Logger(name, print=not args['terminal_viewer'], limit=10**7 if scrimmage else 2**63) docker_inst.stream_logs(line_action=logger) player_['logger'] = logger # Wait until all the code is done then clean up while not game.game_over: time.sleep(0.1) finally: main_server.shutdown() try: main_server.server_close() except e: print(e) if viewer_server is not None: viewer_server.shutdown() match_file = {} match_file['message'] = game.viewer_messages if not game.disconnected: if bc.Team.Red == game.manager.winning_team(): winner = 'player1' else: winner = 'player2' else: winner = game.winner match_file['metadata'] = { 'player1': 'player1' if scrimmage else args['dir_p1'][8:], 'player2': 'player2' if scrimmage else args['dir_p2'][8:], 'winner': winner } if args['docker']: match_output = abspath( os.path.join('/player', str(args['replay_filename']))) else: match_output = args['replay_filename'] if not os.path.isabs(match_output): match_output = abspath(os.path.join('..', str(match_output))) if not scrimmage: print("Saving replay to", match_output) match_ptr = open(match_output, 'w') json.dump(match_file, match_ptr) match_ptr.close() return winner else: return winner, match_file
def run_lighthouse_test(self, task): """Run a lighthouse test against the current browser session""" task['lighthouse_log'] = '' if 'url' in self.job and self.job['url'] is not None: self.job['shaper'].configure(self.job, task) output_path = os.path.join(task['dir'], 'lighthouse.json') json_file = os.path.join(task['dir'], 'lighthouse.report.json') json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz') html_file = os.path.join(task['dir'], 'lighthouse.report.html') html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz') time_limit = min(int(task['time_limit']), 80) command = [ 'lighthouse', '"{0}"'.format(self.job['url']), '--disable-network-throttling', '--disable-cpu-throttling', '--throttling-method', 'provided', '--enable-error-reporting', '--max-wait-for-load', str(int(time_limit * 1000)), '--port', str(task['port']), '--output', 'html', '--output', 'json', '--output-path', '"{0}"'.format(output_path) ] if self.job['keep_lighthouse_trace']: command.append('--save-assets') if self.options.android or 'mobile' not in self.job or not self.job[ 'mobile']: command.append('--disable-device-emulation') if 'user_agent_string' in self.job: sanitized_user_agent = re.sub( r'[^a-zA-Z0-9_\-.;:/()\[\] ]+', '', self.job['user_agent_string']) command.append( '--chrome-flags="--user-agent=\'{0}\'"'.format( sanitized_user_agent)) if len(task['block']): for pattern in task['block']: pattern = "'" + pattern.replace("'", "'\\''") + "'" command.extend(['--blocked-url-patterns', pattern]) if 'headers' in task: headers_file = os.path.join(task['dir'], 'lighthouse-headers.json') with open(headers_file, 'wb') as f_out: json.dump(task['headers'], f_out) command.extend( ['--extra-headers', '"{0}"'.format(headers_file)]) cmd = ' '.join(command) self.lighthouse_command = cmd # Give lighthouse up to 10 minutes to run all of the audits try: lh_thread = threading.Thread(target=self.lighthouse_thread) lh_thread.start() lh_thread.join(600) except Exception: pass from .os_util import kill_all kill_all('node', True) self.job['shaper'].reset() # Rename and compress the trace file, delete the other assets if self.job['keep_lighthouse_trace']: try: lh_trace_src = os.path.join(task['dir'], 'lighthouse-0.trace.json') if os.path.isfile(lh_trace_src): # read the JSON in and re-write it line by line to match the other traces with open(lh_trace_src, 'rb') as f_in: trace = json.load(f_in) if trace is not None and 'traceEvents' in trace: lighthouse_trace = os.path.join( task['dir'], 'lighthouse_trace.json.gz') with gzip.open(lighthouse_trace, 'wb', 7) as f_out: f_out.write('{"traceEvents":[{}') for trace_event in trace['traceEvents']: f_out.write(",\n") f_out.write(json.dumps(trace_event)) f_out.write("\n]}") except Exception: pass # Delete all the left-over lighthouse assets files = glob.glob(os.path.join(task['dir'], 'lighthouse-*')) for file_path in files: try: os.remove(file_path) except Exception: pass if os.path.isfile(json_file): # Remove the raw screenshots if they were stored with the file lh_report = None with open(json_file, 'rb') as f_in: lh_report = json.load(f_in) modified = False if lh_report is not None and 'audits' in lh_report: if 'screenshots' in lh_report['audits']: del lh_report['audits']['screenshots'] modified = True if 'screenshot-thumbnails' in lh_report['audits']: del lh_report['audits']['screenshot-thumbnails'] modified = True if modified: with gzip.open(json_gzip, 'wb', 7) as f_out: json.dump(lh_report, f_out) else: with open(json_file, 'rb') as f_in: with gzip.open(json_gzip, 'wb', 7) as f_out: shutil.copyfileobj(f_in, f_out) try: os.remove(json_file) except Exception: pass # Extract the audit scores if lh_report is not None: audits = {} # v1.x if 'aggregations' in lh_report: for entry in lh_report['aggregations']: if 'name' in entry and 'total' in entry and \ 'scored' in entry and entry['scored']: name = entry['name'].replace(' ', '') audits[name] = entry['total'] # v2.x elif 'reportCategories' in lh_report: for category in lh_report['reportCategories']: if 'name' in category and 'score' in category: category_name = category['name'].replace( ' ', '') score = float(category['score']) / 100.0 audits[category_name] = score if category[ 'name'] == 'Performance' and 'audits' in category: for audit in category['audits']: if 'id' in audit and 'group' in audit and \ audit['group'] == 'perf-metric' and \ 'result' in audit and \ 'rawValue' in audit['result']: name = category_name + '.' + \ audit['id'].replace(' ', '') audits[name] = audit['result'][ 'rawValue'] # v3.x elif 'categories' in lh_report: for categoryId in lh_report['categories']: category = lh_report['categories'][categoryId] if 'title' not in category or 'score' not in category: continue category_title = category['title'].replace(' ', '') audits[category_title] = category['score'] if categoryId != 'performance' or 'auditRefs' not in category: continue for auditRef in category['auditRefs']: if auditRef['id'] not in lh_report['audits']: continue if 'group' not in auditRef or auditRef[ 'group'] != 'metrics': continue audit = lh_report['audits'][auditRef['id']] name = category_title + '.' + audit['id'] audits[name] = audit['rawValue'] audits_gzip = os.path.join(task['dir'], 'lighthouse_audits.json.gz') with gzip.open(audits_gzip, 'wb', 7) as f_out: json.dump(audits, f_out) if os.path.isfile(html_file): # Remove the raw screenshots if they were stored with the file with open(html_file, 'rb') as f_in: lh_report = f_in.read() start = lh_report.find('\n "screenshots') if start >= 0: end = lh_report.find('\n },', start) if end >= 0: lh_report = lh_report[:start] + lh_report[end + 7:] with gzip.open(html_gzip, 'wb', 7) as f_out: f_out.write(lh_report) try: os.remove(html_file) except Exception: pass
def crawl(url_list, procId): posts = [] t = time.time() for i, url in enumerate(url_list): while True: res = requests.get(url) if res.status_code == 200: break else: time.sleep(0.5) soup = BeautifulSoup(res.text, 'lxml') articles = soup.find('div', {'id': 'bodyarea'}).findAll('td', {'class': ['windowbg', 'windowbg2']}) post_article = articles[0] comment_articles = articles[1:] post = {} try: post['post_user'] = post_article.find('td', 'poster_info').find('b').text post_article = post_article.find('td', 'td_headerandpost') post_article_meta = post_article.find('table').findAll('div') post['title'] = post_article_meta[0].text.strip() posted_time = post_article_meta[1].text if 'Today' in posted_time: today = datetime.today() post['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1] else: post['posted_time'] = posted_time post['post_body'] = post_article.find('div', 'post').text except: # poll continue comment_list = [] for comment_article in comment_articles: one_comment = {} try: one_comment['post_user'] = comment_article.find('td', 'poster_info').find('b').text except: print(url) print(comment_article) comment_article = comment_article.find('td', 'td_headerandpost') post_body = comment_article.find('div', 'post').text if post_body.isdigit(): # 비어있는 코멘트? continue one_comment['post_body'] = post_body comment_article_meta = comment_article.find('table').findAll('div') one_comment['title'] = comment_article_meta[0].text.strip() posted_time = comment_article_meta[1].text if 'Today' in posted_time: today = datetime.today() one_comment['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1] else: one_comment['posted_time'] = posted_time comment_list.append(one_comment) page_base_url = url.rpartition(".")[0] current_comment_num = 20 prev_comment_page = '1' while True: time.sleep(0.3) comment_page_url = "%s.%d" % (page_base_url, current_comment_num) while True: res_comment = requests.get(comment_page_url) if res_comment.status_code == 200: break else: time.sleep(0.5) soup_comment = BeautifulSoup(res_comment.text, 'lxml') current_page = soup_comment.find('div', {'id': 'bodyarea'}).find('table').find('b').text if current_page == prev_comment_page: break else: prev_comment_page = current_page current_comment_num += 20 for comment_article in soup_comment.findAll('article'): one_comment = {} one_comment['post_user'] = comment_article.find('td', 'poster_info').find('b').text comment_article = comment_article.find('td', 'td_headerandpost') post_body = comment_article.find('div', 'post').text if post_body.isdigit(): # 비어있는 코멘트? continue one_comment['post_body'] = post_body comment_article_meta = comment_article.find('table').findAll('div') one_comment['title'] = comment_article_meta[0].text.strip() posted_time = comment_article_meta[1].text if 'Today' in posted_time: today = datetime.today() one_comment['posted_time'] = today.strftime("%B %d, %Y,") + posted_time.split("at")[1] else: one_comment['posted_time'] = posted_time comment_list.append(one_comment) post['comments'] = comment_list posts.append(post) if i % 50 == 0: t = time.time() - t print(f"{procId} - {i+1}/{len(url_list)}, {t:.2f} secondes") t = time.time() if i > 0 and i % 1000 == 0: with open(f"bitcoin/bitcoin_forum_{procId}_{i//1000}.json", 'w') as f: json.dump(posts, f) posts = [] time.sleep(120) time.sleep(1) if len(posts) > 0: with open(f"bitcoin/bitcoin_forum_{procId}_last.json", 'w') as f: json.dump(posts, f)
def save(self, path): with path.open(mode='w') as f: json.dump(self.config, f)
def save(filename, obj, message=None): if message is not None: print("Saving {}...".format(message)) with open(filename, "w") as fh: json.dump(obj, fh)
users = getUsersFromUserFriendsAsc(1) res = [] usersCount = users.count() for user in users: data = {} userId = user['user_id'] data = getUserLocation(userId) data["user_id"] = userId data["friends"] = [] i = 0 for f in user["friends"]: if f != userId: friend = {} friend = getUserLocation(f) # friend["user_id"] = f # print friend data["friends"].append(friend) res.append(data) # from pymongo import MongoClient # # client = MongoClient() # # db = client['yelp'] # # db.user_test.insert_many(res) # with open('../../static/json/user_friends_location_data.json', 'w+') as outfile: ujson.dump(res, outfile)
def dump(self, filename): with open(filename, 'w', encoding='utf8') as f: json.dump(self._data, f, ensure_ascii=False)
def _write( datasetmeta, experiment_name, feature_names, label_names, nesting, numclasses, part_size, samplefetcher, tensorgetter, persisters={}): totalrows = 0 eof = False while not eof: samples = [] hashvariable = sha256() ids = [] if callable(part_size): # Estimate sample size and calculate optimal part size try: sample = samplefetcher() except StopIteration: raise Exception('Trying to generate an empty dataset') sampleid = str(sample.entityid) ids.append(sampleid) hashvariable.update(sampleid) samples.append(sample) part_size = part_size(sample) for _ in xrange(0, part_size): try: sample = samplefetcher() sampleid = str(sample.entityid) ids.append(sampleid) hashvariable.update(sampleid) samples.append(sample) except StopIteration: eof = True break except Exception as e: logging.exception(e.message) if len(samples) == 0: break if Settings.EnsureUniqueEntitiesInDataset and len(ids) > len(set(ids)): raise Exception('String representations of sample ids are not unique') totalrows += len(samples) digest = hashvariable.hexdigest() partdir = 'input/%s' % experiment_name h_idx = 0 for nest in nesting: partdir += '/' + digest[h_idx: h_idx + nest] h_idx += nest partdir += '/part_%s' % digest if os.path.isdir(partdir): with open(partdir + '/part.json', 'r') as f: partmeta = json.load(f) partexists = True else: partexists = False partmeta = { 'bytesize': 0, 'numsamples': len(samples), 'unordered_features': [] } os.makedirs(partdir) if partexists: # because conversion from sample to X takes time, we don't perform it, if there is already a cached # part on the disk. This is especially handy in the case when dataset processing had terminated due to # a bug in some feature, so you have to restart it. features_to_get = [] for feature in feature_names: featurefile = '%s/%s' % (partdir, DataSet.fname(feature)) if not os.path.isfile(featurefile): features_to_get.append(feature) else: features_to_get = feature_names if len(features_to_get) > 0: for feature in features_to_get: featurefile = '%s/%s' % (partdir, DataSet.fname(feature)) x = tensorgetter(samples, feature) x[np.isnan(x)] = BaseFeature.MISSING_VALUE try: for ff in datasetmeta['features']: if ff['name'] == feature: if len(ff['output_shape']) == 0: cntr = ff['top10values'][0] cntr.update(x) if len(cntr) > 10: ff['top10values'][0] = Counter(dict(cntr.most_common(10))) elif len(ff['output_shape']) == 1: for i in xrange(0, ff['output_shape'][0]): cntr = ff['top10values'][i] cntr.update(x[:, i]) if len(cntr) > 10: ff['top10values'][i] = Counter(dict(cntr.most_common(10))) else: cntr = ff['top10values'][0] cntr.update([np.mean(x)]) if len(cntr) > 10: ff['top10values'][0] = Counter(dict(cntr.most_common(10))) break except: logging.info('Cannot calculate top10values ' + traceback.format_exc()) if feature in persisters: persisters[feature].save(featurefile, x) else: with open(featurefile, 'wb') as f: np.save(f, x) if feature not in partmeta['unordered_features']: partmeta['unordered_features'].append(feature) partmeta['bytesize'] += sys.getsizeof(x) for label in label_names: labelfile = '%s/Label-%s' % (partdir, DataSet.fname(label)) x = tensorgetter(samples, label) x[np.isnan(x)] = BaseFeature.MISSING_VALUE if label in persisters: persisters[label].save(labelfile, x) else: with open(labelfile, 'wb') as f: np.save(f, x) if numclasses > 0 and len(label_names) == 1 and 'class_counts' in datasetmeta: if len(x.shape) == 1: for cls in xrange(0, numclasses): klass = 'Class_' + str(cls) datasetmeta['class_counts'][klass] += sum(1 for y in x if y == cls) elif len(x.shape) == 2 and x.shape[1] == 1: for cls in xrange(0, numclasses): klass = 'Class_' + str(cls) datasetmeta['class_counts'][klass] += sum(1 for y in x if y[0] == cls) else: for cls in xrange(0, numclasses): klass = 'Class_' + str(cls) datasetmeta['class_counts'][klass] += sum(x[:, cls]) partmeta['bytesize'] += sys.getsizeof(x) if not os.path.isfile(partdir + '/ids.txt'): with open(partdir + '/ids.txt', 'wb') as f: f.writelines([x + "\n" for x in ids]) with open(partdir + '/part.json', 'w') as f: json.dump(partmeta, f) logging.info('%s stored or updated. In total %d rows generated' % (partdir, totalrows)) with open('input/%s/dataset_V5.json' % experiment_name, 'w') as f: json.dump(datasetmeta, f) sollfeatures = set([x['name'] for x in datasetmeta['features']]) for entry in scandir('input/%s' % experiment_name): if entry.is_dir() and entry.name.startswith('part_'): metafile = 'input/%s/%s/part.json' % (experiment_name, entry.name) if os.path.isfile(metafile): with open(metafile, 'r') as f: meta = json.load(f) ist = set(meta['unordered_features']) missing = sollfeatures.difference(ist) if len(missing) > 0: logging.warning('%s does not contain following features: %s ' % (entry, str(missing))) x = input( 'Press y to remove the part, any other key to leave it (in this case missing feature will always have missing values)') if x == 'y': shutil.rmtree('input/%s/%s' % (experiment_name, entry)) with open('input/%s/dataset_V5.json' % experiment_name, 'w') as f: json.dump(datasetmeta, f) for ff in datasetmeta['features']: for v in ff['top10values']: if len(v) == 0: logging.warning('Feature %s has no values' % ff['name']) elif len(v) == 1: if v.most_common(1)[0][0] == BaseFeature.MISSING_VALUE: logging.warning('Feature %s has only missing values' % ff['name']) else: logging.warning('Feature %s has only one value %s' % (ff['name'], v.most_common(1)[0][0])) elif v.most_common(1)[0][1] > 0.99 * totalrows: logging.warning('Feature %s has the value %s in more than 99%% of samples' % (ff['name'], v.most_common(1)[0][ 0])) if 'class_counts' in datasetmeta: notpresent = [] lessthancent = [] for k, v in datasetmeta['class_counts'].iteritems(): if v == 0: notpresent.append(str(k)) if v < 0.01 * totalrows: lessthancent.append(str(k)) if len(notpresent) > 0 or len(lessthancent) > 0: raise Exception('There is a class distribution problem. Following classes ' 'are not present in the dataset: %s. Following classes ' 'contribute to less than 1%% of dataset: %s' % (','.join(notpresent), ','.join(lessthancent))) return totalrows
def mqtt(): mainOctopus() print("Hello, this will help you initialize MQTT client") print("ver: " + ver + " (c)octopusLAB") print("id: " + esp_id) print("Press Ctrl+C to abort") # TODO improve this # prepare directory if 'config' not in uos.listdir(): uos.makedirs('config') run = True while run: sele = setupMenu() if sele == "x": print("Setup - exit >") time.sleep_ms(2000) print("all OK, press CTRL+D to soft reboot") run = False if sele == "si": #system_info() from util.sys_info import sys_info sys_info() if sele == "cv": print("------- Set 0/1/str for settings ------") wc = {} wc['name'] = input("device (host)name/describe: ") wc['time'] = int(input("get time from server? [1/0]: ")) wc['mysql'] = int(input("send data to mysql db [1/0]: ")) if wc['mysql']: wc['mysqlURL'] = input("mysql Write URL: ") wc['mqtt'] = int(input("mqtt client [1/0]: ")) wc['influx'] = int(input("send data to influx db [1/0]: ")) if wc['influx']: wc['influxWriteURL'] = input("influx Write URL: ") wc['timer'] = int(input("timer: ")) print("Writing to file config/mqtt_io.json") with open('config/mqtt_io.json', 'w') as f: ujson.dump(wc, f) if sele == "ms": print("Set mqtt >") print() mq = {} mq['mqtt_broker_ip'] = input("BROKER IP: ") mq['mqtt_ssl'] = int(input("> SSL (0/1): ")) mq['mqtt_port'] = int(input("> PORT (1883/8883/?): ")) mq['mqtt_clientid_prefix'] = input("CLIENT PREFIX: ") mq_user = input("Username: "******"" else mq_user mq_pass = input("Password: "******"" else mq_pass mq['mqtt_root_topic'] = input("ROOT TOPIC: ") print("Writing to file config/mqtt.json") with open('config/mqtt.json', 'w') as f: ujson.dump(mq, f) def mqtt_sub(topic, msg): print("MQTT Topic {0}: {1}".format(topic, msg)) if sele == "mt": print("mqtt simple test:") print("wifi_config >") wifi = WiFiConnect(250) wifi.events_add_connecting(connecting_callback) wifi.events_add_connected(connected_callback) print("wifi.connect") wifi_status = wifi.connect() # url config: TODO > extern. print("mqtt_config >") mqtt_clientid_prefix = read_mqtt_config()["mqtt_clientid_prefix"] mqtt_host = read_mqtt_config()["mqtt_broker_ip"] mqtt_root_topic = read_mqtt_config()["mqtt_root_topic"] mqtt_ssl = read_mqtt_config()["mqtt_ssl"] mqtt_user = read_mqtt_config()["mqtt_user"] mqtt_pass = read_mqtt_config()["mqtt_pass"] mqtt_clientid = mqtt_clientid_prefix + esp_id c = MQTTClient(mqtt_clientid, mqtt_host, ssl=mqtt_ssl, user=mqtt_user, password=mqtt_pass) c.set_callback(mqtt_sub) print("mqtt.connect to " + mqtt_host) c.connect() """ # c.subscribe("/octopus/device/{0}/#".format(esp_id)) subStr = mqtt_root_topic+"/"+esp_id+"/#" print("subscribe (root topic + esp id):" + subStr) c.subscribe(subStr) """ mqtt_log_topic = mqtt_root_topic + "/log" print("mqtt log > " + mqtt_log_topic) print(mqtt_log_topic) # mqtt_root_topic_temp = "octopus/device" c.publish(mqtt_log_topic, esp_id) # topic, message (value) to publish
def save_time(self): file = open(TIME_FILE, "w") ujson.dump(utime.localtime(), file) file.close() return
element.xpath( ".//div[@class='company-title']/a/text()")[0].strip(), #'tel': element.xpath(".//div[@class='legal-person']/span[@class='margin-r-1x']/text()")[0].strip(), 'legal_owner': element.xpath( ".//div[@class='legal-person']/text()")[0].strip(), #'address': element.xpath(".//div[@class='legal-person'][1]/span/text()")[0].strip(), 'status': element.xpath(".//div[@class='company-tags']/span/text()") [0].strip(), 'capital': element.xpath(".//div[contains(@class, 'col-3-1')]/text()") [0].strip(), 'date': element.xpath(".//div[contains(@class, 'col-3-2')]/text()") [0].strip() #'url': element.xpath(".//div[@class='company-title']/a/@href")[0].strip() }) time.sleep(10) return result if __name__ == "__main__": fetcher = Fetcher() fetcher.login() time.sleep(5) html = fetcher.get_page() ujson.dump(html, open('result.json', 'w')) fetcher.close_driver()
auto.add_word(word, word) auto.make_automaton() for word in tqdm(words): for end_ind, found in auto.iter(word): if found in mapping: mapping[found].append(word) else: mapping[found] = [word] return mapping cedict = pd.read_csv(f"./data/intermediate/cedict.txt", sep="\t", index_col=0) simplified_words = list(cedict["simplified"]) traditional_words = list(cedict["traditional"]) simplified_char_to_word = characters_to_words(simplified_words) traditional_char_to_word = characters_to_words(traditional_words) with gzip.open("./data/intermediate/simplified_containing_words.json.zip", "wt", encoding="utf-8") as f: ujson.dump(simplified_char_to_word, f) with gzip.open("./data/intermediate/traditional_containing_words.json.zip", "wt", encoding="utf-8") as f: ujson.dump(traditional_char_to_word, f)
else: for dataset in [train_data, test_data, eval_data]: dataset.map_items(tokenizer, final_url_ids, final_publication_ids, filter=False) print("Items mapped") mapped_data_path = Path(args.data_dir) / "mapped-data" if not mapped_data_path.is_dir(): mapped_data_path.mkdir() train_mapped_path = mapped_data_path / "train.json" test_mapped_path = mapped_data_path / "test.json" eval_mapped_path = mapped_data_path / "evaluation.json" with open(train_mapped_path, "w") as file: json.dump(train_data.examples, file) with open(test_mapped_path, "w") as file: json.dump(test_data.examples, file) with open(eval_mapped_path, "w") as file: json.dump(eval_data.examples, file) print(f"Mapped Data saved to {mapped_data_path} directory") # create weights for dataset samples to ensure only positive and negative examples are chosen in respective samples pos_sampler = train_data.create_positive_sampler(args.target_publication) neg_sampler = train_data.create_negative_sampler(args.target_publication) train_batch_sampler = sampler_util.BatchSamplerWithNegativeSamples( pos_sampler=pos_sampler, neg_sampler=neg_sampler, items=train_data.examples, batch_size=args.batch_size,
def dump_user_collection(uid: str, ucollection): with open(os.path.join(_collection_path, f'{uid}.json'), 'w', encoding='utf8') as f: json.dump(ucollection, f, ensure_ascii=False) f.close()
def writefile(self, data): with open(self._filename, 'w') as fh: ujson.dump(data, fh, indent=4) return True
async def get_wiki_article(self, wiki_title: str) -> WikiArticle: # Note client is responsible for rate limiting as needed if self.cache_dir is not None: tokenized_file = self._get_tokenized_filename(wiki_title) if exists(tokenized_file): log.info("Load wiki article for \"%s\" from cache", wiki_title) with open(tokenized_file, "r") as f: data = ujson.load(f) return WikiArticle(data["title"], data["url"], [WikiParagraph.from_json(x) for x in data["paragraphs"]]) log.info("Load wiki article for \"%s\"", wiki_title) async with ClientSession() as sess: # Use int(self.follow_redirects) since this get method doesn't support # bool values for some reason async with sess.get(url=WIKI_API, params=dict(action="parse", page=wiki_title, redirects=int(self.follow_redirects), format="json")) as resp: data = await resp.json() raw_data = data["parse"] # Extract paragraph based on HTML tags # Wiki html is pretty structured, so this seems to work reasonable well soup = BeautifulSoup(raw_data["text"]["*"], "lxml") paragraphs = [] to_find = ["p", "h2", "h3", "h4", "h5", "h6"] if self.extract_lists: to_find += ["ul", "ol"] for element in soup.findAll(to_find): if element.name[0] == "h": if element.get_text() == "Contents": continue sect_name = element.find(attrs={"class": "mw-headline"}).get_text() para = self._sent_to_paragraph(len(paragraphs), "section", [sect_name]) if para.n_tokens > 0: paragraphs.append(para) elif element.name == "ul" or element.name == "ol": if dict(element.parent.attrs).get("class") != ["mw-parser-output"]: # only extract "body" lists continue para = self._sent_to_paragraph(len(paragraphs), "list" if element.name == "ul" else "ordered_list", [x.get_text() for x in element.findAll("li")]) if para.n_tokens > 0: paragraphs.append(para) else: # remove citations for citation in element.findAll("sup", {"class": "reference"}): citation.extract() # remove citation needed for sub in element.findAll("sup"): citations = sub.findAll("a", href=True) if len(citations) == 1: citation = citations[0] href = citation["href"] if href.startswith("#cite") or href == "/wiki/Wikipedia:Citation_needed": sub.extract() text = element.get_text() para = self._text_to_paragraph(len(paragraphs), "paragraph", text) if para.n_tokens > 0: paragraphs.append(para) article = WikiArticle(wiki_title, raw_data["pageid"], paragraphs) if self.cache_dir is not None: # save to cache with open(tokenized_file, "w") as f: ujson.dump(dict(title=article.title, url=article.url, paragraphs=[x.to_json() for x in article.paragraphs]), f) return article
def store_params(learn_args): log(logger.info, 'Learning args: {}'.format(learn_args)) with open('%s.json' % learn_args['model_prefix'], 'w') as fout: json.dump(learn_args, fout)
def save(filename, obj, message=None): if message is not None: print(f"Saving {message}...") with open(filename, "w") as fh: json.dump(obj, fh)
import pickle import ujson as json import pandas as pd with open("output/db.pickle", "rb") as f: db = pickle.load(f) json_data = { kk: {str(k[0]): v.data for k, v in vv.items()} for kk, vv in db.data.items() } with open('output/data_full.json', 'w') as f: json.dump(json_data, f, indent=2, sort_keys=True) for kk, vv in json_data.items(): df = pd.DataFrame.from_dict(vv, orient='index') df.to_csv("output/{}.csv".format(kk), index=False, na_rep='NA')
async def choice_cb(_, c_q: CallbackQuery): if not os.path.exists(PATH): await c_q.answer("𝑶𝒑𝒊𝒏𝒊𝒐𝒏 𝒅𝒂𝒕𝒂 𝒅𝒐𝒏'𝒕 𝒆𝒙𝒊𝒔𝒕 𝒂𝒏𝒚𝒎𝒐𝒓𝒆.", show_alert=True) return opinion_id = c_q.matches[0].group(2) ids = c_q.from_user.id counter = c_q.matches[0].group(1) with open(PATH) as f: data = ujson.load(f) view_data = data[str(opinion_id)] agree_data = "👍" disagree_data = "👎" if len(view_data) == 2: if str(ids) in view_data[0]: if view_data[0][str(ids)] == "y" and counter == "y": await c_q.answer("Already Voted for 👍", show_alert=True) return if view_data[0][str(ids)] == "n" and counter == "n": await c_q.answer("Already Voted for 👎", show_alert=True) return # Answering Query First then moving forward choice = _choice(counter) await c_q.answer(f"You Choose {choice}", show_alert=False) # if view_data[0][str(ids)] == "y" and counter == "n": agree = int(view_data[1]["agree"]) - 1 disagree = int(view_data[1]["disagree"]) + 1 view_data[1] = {"agree": agree, "disagree": disagree} view_data[0][str(ids)] = "n" if view_data[0][str(ids)] == "n" and counter == "y": agree = int(view_data[1]["agree"]) + 1 disagree = view_data[1]["disagree"] - 1 view_data[1] = {"agree": agree, "disagree": disagree} view_data[0][str(ids)] = "y" else: # Answering Query First then moving forward choice = _choice(counter) await c_q.answer(f"You Choose {choice}", show_alert=False) # new_id = {ids: counter} view_data[0].update(new_id) if counter == "y": agree = view_data[1]["agree"] + 1 disagree = view_data[1]["disagree"] if counter == "n": agree = view_data[1]["agree"] disagree = view_data[1]["disagree"] + 1 view_data[1] = {"agree": agree, "disagree": disagree} data[str(opinion_id)] = view_data with open(PATH, "w") as outfile: ujson.dump(data, outfile) else: if len(view_data) == 1: # Answering Query First then moving forward choice = _choice(counter) await c_q.answer(f"You Choose {choice}", show_alert=False) if counter == "y": view_data = [{ids: "y"}, {"agree": 1, "disagree": 0}] if counter == "n": view_data = [{ids: "n"}, {"agree": 0, "disagree": 1}] data[str(opinion_id)] = view_data with open(PATH, "w") as outfile: ujson.dump(data, outfile) agree_data += f" {view_data[1]['agree']}" disagree_data += f" {view_data[1]['disagree']}" opinion_data = [ [ InlineKeyboardButton(agree_data, callback_data=f"op_y_{opinion_id}"), InlineKeyboardButton(disagree_data, callback_data=f"op_n_{opinion_id}"), ], [ InlineKeyboardButton("📊 Stats", callback_data=f"opresult_{opinion_id}") ], ] try: await c_q.edit_message_reply_markup( reply_markup=InlineKeyboardMarkup(opinion_data)) except FloodWait as e: await asyncio.sleep(e.x) except BadRequest: return
def adapter(json_thing): response = Response(content_type="application/json") # We'll write to the response body "file" in hopes that it's faster than a # huge standard string. Absolutely 100% untested and uninvestigated. ujson.dump(json_thing, response.body_file) return response
def save_json(self, fname): with open(os.path.join(self.quantitative_dir, fname), 'w') as fp: ujson.dump(self.agent_data, fp, sort_keys=True, indent=4)