def getlocals(steamhome, logindata, gamedata, localdata): logindata["configdata"] = {} for root, subFolders, files in os.walk(os.path.join(steamhome["path"], "userdata", logindata["dir"])): for file in files: if file.lower().endswith("config.vdf"): vdfdata = utils.vdf.load(open(os.path.join(steamhome["path"], "userdata", root, file))) logindata["configdata"] = utils.merge(logindata["configdata"], vdfdata) def getnewgamedata(appid, name): ret = {"appid": appid, "name": name} if int(appid) <= 999999: # not a shortcut ret["firstseen"] = int(time.time()) return ret purchaseddata = purchased.getpurchased(logindata) localdb = utils.merge(shortcuts.getshortcuts(steamhome, logindata), apppackages.getappinfo(steamhome, logindata)) localdata.clear() for g in localdb: if "data-isdlc" not in localdb[g]: localdata[g] = localdb[g] if not g in gamedata: gamedata[g] = getnewgamedata(g, localdb[g]["name"]) if "data-packageid" in localdb[g] and localdb[g]["data-packageid"] in purchaseddata: gamedata[g]["firstseen"] = purchaseddata[localdb[g]["data-packageid"]]
def _merge_sort(a, left, right): if right - left < 2: return pivot = (right + left) // 2 _merge_sort(a, left, pivot) _merge_sort(a, pivot, right) merge(a, left, pivot, right)
def select_group (self, group): if self.__group == group: return if group: groups = [ group ] + [ g for g in self.groups if g != group ] else: groups = self.groups # clear dict and only keep some values we want unchanged if not self.__base_dict: self.__base_dict = self.__dict__.copy() else: self.__dict__ = self.__base_dict.copy() # updating for group_ in groups: group_.select_group(None) if group_.handlers: merge(self.handlers, group_.handlers) self.__inherits(self.__dict__, group_.__dict__) # some value that we must reset to their original state for key in ('synctrex', 'group', 'groups', 'children'): if key in self.__base_dict: setattr(self, key, self.__base_dict[key]) elif hasattr(self, key): delattr(self, key) self.__group = group
def mkdumpdb(): dumpdb = { "name64": logindata["id64"], "name": logindata["dir"], "user": logindata["name"] } if "avatar" in logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]: dumpdb["avatar"] = logindata["configdata"]["UserLocalConfigStore"]["friends"][logindata["dir"]]["avatar"] dumpdb["gamelist"] = {} for db in dbs: dbo = copy.deepcopy(db) utils.merge(dumpdb["gamelist"],dbo) return dumpdb
def test(self): print('patched based testing') #load new images in a folder try: self.load(self.config.checkpoint_dir) print(" [*] Load SUCCESS") except: print(" [!] Load failed...") return print('new_data_folder',self.config.new_image_path) nxny_list,namelist=input_setup_test(self.sess,self.config) new_data_dir = os.path.join(self.config.checkpoint_dir,'new.c'+str(self.config.c_dim)+'.h5') X_test,_=read_data(new_data_dir) tst_data_loader=dataLoader(dataSize=X_test.shape[0], batchSize=self.config.test_batch_size, shuffle=False) tst_batch_count=int(math.ceil(X_test.shape[0]/self.config.test_batch_size)) #print(X_test[0].shape) #print(X_test[1].shape) #new_data_loader=tf.data.Dataset.from_tensor_slices(X_test) #new_data_loader = new_data_loader.batch(batch_size=self.config.test_batch_size) #iterator = tf.data.Iterator.from_structure(new_data_loader.output_types,new_data_loader.output_shapes) #next_batch=iterator.get_next() #new_init_op = iterator.make_initializer(new_data_loader) result=list() #self.sess.run(new_init_op) start_time=time.time() for batch in range(tst_batch_count): inx=tst_data_loader.get_batch() X=X_test[inx].view()#self.sess.run(next_batch) y_pred = self.pred.eval({self.images: X}) result.append(y_pred) #total_mse+=tf.reduce_mean(tf.squared_difference(y_pred, y)) #batch_count+=1 #averge_mse=total_mse/batch_count #PSNR=-10*math.log10(averge_mse) print("time: [%4.2f]" % (time.time()-start_time)) #save #flatten print(len(result)) output=list() for i in result: for j in range(i.shape[0]): output.append(i[j]) print(len(output)) print(output[0].shape) #result=[self.sess.run(i) for i in result] patch_inx=0 for i in range(len(nxny_list)): nx,ny=nxny_list[i] img=merge(output[patch_inx:(patch_inx+nx*ny)],(nx,ny)) print('img shape@',i,img.shape) patch_inx+=nx*ny imsave(img,namelist[i].replace('.bmp','.bmp.c'+str(self.config.c_dim)))
def predict(override_cfg, model_dir): """Run model over a dataset and dump predictions to json file.""" assert FLAGS.predict_path cfg = _load_config(model_dir) cfg = utils.merge(cfg, override_cfg) input_fn = data.get_input_fn( split=cfg.dataset.eval_split, max_length=None, repeat=False, shuffle=False, cache=False, limit=None, data_path=cfg.dataset.data_path, vocab_path=cfg.dataset.vocab_path, is_tpu=False, use_generator=True, is_training=False) estimator = model.get_estimator(**cfg) predictions = dict() for i, prediction in enumerate(estimator.predict(input_fn)): predictions[prediction["id"]] = prediction["answer"] if i % 100 == 0: tf.logging.info("Prediction %s | %s: %s" % (i, prediction["id"], prediction["answer"])) # Dump results to a file with tf.gfile.GFile(FLAGS.predict_path, "w") as f: json.dump(predictions, f)
def get_base_dataset_simclr(self): base_dataset = torchvision.datasets.ImageFolder( self.train_path, transform=None ) ''' if self.oversampling: base_indices = oversampling_indices(np.array(list(range(len(base_dataset)))), np.array(base_dataset.targets)) else: base_indices = np.array(list(range(len(base_dataset)))) ''' if self.merged and len(self.merge_classes) > 0: base_dataset = merge(base_dataset, self.merge_classes) if self.remove_classes and len(self.classes_to_remove) > 0: base_dataset = remove(base_dataset, self.classes_to_remove) base_indices = np.array(list(range(len(base_dataset)))) base_dataset = WeaklySupervisedDataset(base_dataset, base_indices, transform=self.transform_simclr, mean=self.isic_mean, std=self.isic_std) return base_dataset
def document_info_for(filename, cache, options): mods_url = mods_for(filename) mods_cache = "" body = utils.download(mods_url, cache, utils.merge(options, {'xml': True}) ) doc = etree.fromstring(body) mods_ns = {"mods": "http://www.loc.gov/mods/v3"} locations = doc.xpath("//mods:location/mods:url", namespaces=mods_ns) urls = {} for location in locations: label = location.attrib['displayLabel'] if "HTML" in label: format = "html" elif "PDF" in label: format = "pdf" elif "XML" in label: format = "xml" else: format = "unknown" urls[format] = location.text issued_on = doc.xpath("string(//mods:dateIssued)", namespaces=mods_ns) return issued_on, urls
def evaluate(override_cfg, model_dir, continuous=True): """Run training and evaluation.""" tf.logging.info("model_dir = " + model_dir) try: cfg = _load_config(model_dir) except tf.errors.NotFoundError: tf.logging.info("Model directory does not exist yet. Creating new config.") cfg = model.build_config(model_dir=model_dir, data_path=FLAGS.data_path) tf.logging.info(cfg) tf.logging.info(override_cfg) cfg = utils.merge(cfg, override_cfg) cfg.tpu.enable = False cfg.dataset.max_length = None # Construct inputs and estimator _, eval_input = data.build_dataset(cfg.dataset, is_tpu=cfg.tpu.enable) estimator = model.get_estimator(**cfg) if continuous: checkpoints_iterator = contrib_training.checkpoints_iterator(cfg.model_dir) eval_metrics = None for ckpt_path in checkpoints_iterator: eval_metrics = estimator.evaluate( input_fn=eval_input, checkpoint_path=ckpt_path) tf.logging.info(pprint.pformat(eval_metrics)) return eval_metrics else: eval_metrics = estimator.evaluate(input_fn=eval_input) return eval_metrics
def add(self, words): # logging.info("add {}".format(words)) if len(words) in [UNIGRAM, BIGRAM, TRIGRAM]: self.counter.add(utils.merge(words)) else: raise TypeError("Only support unigram, bigram, trigram")
def eval(model, name, sample_shape=[4, 4], load_all_ckpt=True): if name == None: name = model.name dir_name = 'eval/' + name if tf.gfile.Exists(dir_name): tf.gfile.DeleteRecursively(dir_name) tf.gfile.MakeDirs(dir_name) # training=False => generator only restorer = tf.train.Saver(slim.get_model_variables()) config = tf.ConfigProto() best_gpu = utils.get_best_gpu() config.gpu_options.visible_device_list = str( best_gpu) # Works same as CUDA_VISIBLE_DEVICES! with tf.Session(config=config) as sess: ckpts = get_all_checkpoints('./checkpoints/' + name, force=load_all_ckpt) size = sample_shape[0] * sample_shape[1] z_ = sample_z([size, model.z_dim]) for v in ckpts: print("Evaluating {} ...".format(v)) restorer.restore(sess, v) global_step = int(v.split('/')[-1].split('-')[-1]) fake_samples = sess.run(model.fake_sample, {model.z: z_}) # inverse transform: [-1, 1] => [0, 1] fake_samples = (fake_samples + 1.) / 2. merged_samples = utils.merge(fake_samples, size=sample_shape) fn = "{:0>5d}.png".format(global_step) scipy.misc.imsave(os.path.join(dir_name, fn), merged_samples)
def mirror_package_zipfile(collection, package_name, file_path, lastmod, lastmod_cache, options): # Do we already have this file updated? if lastmod_cache.get("package") == lastmod: if not options.get("force", False): return # With --cached, skip if the file is already downloaded. if os.path.exists(file_path) and options.get("cached", False): return # Download. file_url = GOVINFO_BASE_URL + "content/pkg/{}-{}.zip".format( collection, package_name) logging.warn("Downloading: " + file_path) data = utils.download( file_url, file_path, utils.merge( options, { 'binary': True, 'force': True, # decision to cache was made above 'to_cache': False, 'needs_content': False, })) # Update the lastmod of the downloaded file. lastmod_cache['package'] = lastmod return True
def run_test(self): test_data, test_label = test_input_setup(self) print("Testing...") start_time = time.time() result = np.clip( self.pred.eval({ self.images: test_data, self.labels: test_label, self.batch: 1 }), 0, 1) passed = time.time() - start_time img1 = tf.convert_to_tensor(test_label, dtype=tf.float32) img2 = tf.convert_to_tensor(result, dtype=tf.float32) psnr = self.sess.run(tf.image.psnr(img1, img2, 1)) ssim = self.sess.run(tf.image.ssim(img1, img2, 1)) print("Took %.3f seconds, PSNR: %.6f, SSIM: %.6f" % (passed, psnr, ssim)) result = merge(self, result) image_path = os.path.join(os.getcwd(), self.output_dir) image_path = os.path.join(image_path, "test_image.png") array_image_save(result, image_path)
def vote_ids_for_senate(congress, session_year, options): session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1 vote_ids = [] page = utils.download( "http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num), "%s/votes/%s/pages/senate.xml" % (congress, session_year), utils.merge(options, {'binary': True}) ) if not page: logging.error("Couldn't download Senate vote XML index, aborting") return None dom = etree.fromstring(page) # Sanity checks. if int(congress) != int(dom.xpath("congress")[0].text): logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text) return None if int(session_year) != int(dom.xpath("congress_year")[0].text): logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text) return None # Get vote list. for vote in dom.xpath("//vote"): num = int(vote.xpath("vote_number")[0].text) vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year if not should_process(vote_id, options): continue vote_ids.append(vote_id) return vote_ids
def narrow_docids(self, idx): m0 = [ decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat) ] if not m0: return [] refs = merge(m0) locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ] return locs
def mirror_package(sitemap, package_name, lastmod, content_detail_url, options): """Create a local mirror of a FDSys package.""" # Return a list of files we downloaded. results = [] if not options.get("granules", False): # Most packages are just a package. This is the usual case. results = mirror_package_or_granule(sitemap, package_name, None, lastmod, options) else: # In some collections, like STATUTE, each document has subparts which are not # described in the sitemap. Load the main HTML page and scrape for the sub-files. # In the STATUTE collection, the MODS information in granules is redundant with # information in the top-level package MODS file. But the only way to get granule- # level PDFs is to go through the granules. content_index = utils.download(content_detail_url, "fdsys/package/%s/%s/%s.html" % (sitemap["year"], sitemap["collection"], package_name), utils.merge(options, { 'binary': True, })) if not content_index: raise Exception("Failed to download %s" % content_detail_url) for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"): if link.text == "More": m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href")) if not m or m.group(1) != package_name: raise Exception("Unmatched granule URL %s" % link.get("href")) granule_name = m.group(2) results = mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options) return results
def append_mode(self, w): trans = self.initial_translation() trans = merge(self._word_dict[w]["cell"].comment.content, trans) if verify(w, trans): self._word_dict[w]["cell"].comment = Comment(text=trans, author="Lee Mist") self.up_color_level(w)
def run(options): amdt_id = options.get('amendment_id', None) search_state = { } if amdt_id: amdt_type, number, congress = utils.split_bill_id(amdt_id) to_fetch = [amdt_id] else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No amendments changed.") else: logging.error("Error figuring out which amendments to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress)) saved_amendments = utils.process_set(to_fetch, fetch_amendment, options) save_bill_search_state(saved_amendments, search_state)
def vote_ids_for_senate(congress, session_year, options): session_num = int(session_year) - utils.get_congress_first_year(int(congress)) + 1 vote_ids = [] url = "http://www.senate.gov/legislative/LIS/roll_call_lists/vote_menu_%s_%d.xml" % (congress, session_num) page = utils.download( url, "%s/votes/%s/pages/senate.xml" % (congress, session_year), utils.merge(options, {'binary': True}) ) if not page or "Requested Page Not Found (404)" in page: logging.error("Couldn't download Senate vote XML index %s, skipping" % url) return None dom = etree.fromstring(page) # Sanity checks. if int(congress) != int(dom.xpath("congress")[0].text): logging.error("Senate vote XML returns the wrong Congress: %s" % dom.xpath("congress")[0].text) return None if int(session_year) != int(dom.xpath("congress_year")[0].text): logging.error("Senate vote XML returns the wrong session: %s" % dom.xpath("congress_year")[0].text) return None # Get vote list. for vote in dom.xpath("//vote"): num = int(vote.xpath("vote_number")[0].text) vote_id = "s" + str(num) + "-" + str(congress) + "." + session_year if not should_process(vote_id, options): continue vote_ids.append(vote_id) return vote_ids
def merge_file(self): self.output = merge(self.files) if self.files.__len__() == 0: messagebox.showerror("Error Message", "You did not select source file") if self.output.empty: messagebox.showerror("Error Message", "Column Values Is Different") return else: f = filedialog.asksaveasfilename(defaultextension='.xlsx', filetypes=[("Default Excel file", "*.xlsx"), ("Excel file 97-2003", "*.xls")]) if not f.rsplit("/", 1)[1].endswith('.xlsx' or '.xls'): messagebox.showerror("Error Message", "Only xlsx and xls File Types") return try: writer = pd.ExcelWriter(f, engine='xlsxwriter') self.output.to_excel(writer) writer.save() self.listbox_list_of_files.delete(0, END) messagebox.showinfo("", "File Merge Completed") except Exception as e: messagebox.showerror('Awww', 'Unknown Error Occurred.')
def save_image_to_memory(image): image = inverse_transform(image) image = merge(image, (1, 1)) image = cv2.cvtColor(image.astype('uint8'), cv2.COLOR_RGB2BGR) is_success, buffer = cv2.imencode(".jpg", image) io_buf = io.BytesIO(buffer) return io_buf
def do_del(self, args): """Removes breakpoints. Usage is the same as 'b', but the selected breakpoints and breakpoint ranges are being deleted this time.""" code_targets = [] mem_targets = [] global mem_breakpoints if not args: breakpoints.clear() mem_breakpoints.clear() apicall_handler.pending_breakpoints.clear() for arg in args.split(" "): if not arg: continue if arg == "stack": mem_targets += [(STACK_ADDR, STACK_ADDR + STACK_SIZE)] elif "m" == arg[0]: try: parts = list(map(lambda p: int(p, 0), arg[1:].split("-"))) if len(parts) == 1: lower = upper = parts[0] else: lower = min(parts) upper = max(parts) mem_targets += [(lower, upper)] except ValueError: print(f"Error parsing address or range {arg}") elif "$" == arg[0]: arg = arg[1:] if arg in apicall_handler.hooks.values(): for addr, func_name in apicall_handler.hooks.items(): if arg == func_name: code_targets += [addr] break elif arg in apicall_handler.pending_breakpoints: apicall_handler.pending_breakpoints.remove(arg) else: print( f"Unknown method {arg}, not imported or used in pending breakpoint" ) else: try: code_targets += [int(arg, 0)] except ValueError: print(f"Error parsing address {arg}") with data_lock: for t in code_targets: try: breakpoints.remove(t) except KeyError: pass new_mem_breakpoints = [] for b_lower, b_upper in mem_breakpoints: for t_lower, t_upper in mem_targets: new_mem_breakpoints += remove_range((b_lower, b_upper), (t_lower, t_upper)) mem_breakpoints = list(merge(new_mem_breakpoints)) self.print_breakpoints()
def merge_sort(arr): if len(arr) < 2: return arr # already sorted mid_ix = len(arr) // 2 left = merge_sort(arr[:mid_ix]) right = merge_sort(arr[mid_ix:]) return merge(left, right)
def train(self, config): if config.is_train: input_setup(self.sess, config) else: nx, ny, arr = input_setup(self.sess, config) print(np.shape(arr)) if config.is_train: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5") train_data, train_label = read_data(data_dir,is_train=True) # Stochastic gradient descent with the standard backpropagation self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss) tf.global_variables_initializer().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if config.is_train: print("Training...") for ep in range(config.epoch): # Run by batch images batch_idxs = len(train_data) // config.batch_size for idx in range(0, batch_idxs): batch_images = train_data[idx*config.batch_size : (idx+1)*config.batch_size] batch_labels = train_label[idx*config.batch_size : (idx+1)*config.batch_size] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: batch_images, self.labels: batch_labels}) if counter % 10 == 0: print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \ % ((ep+1), counter, time.time()-start_time, err)) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) else: print("Testing...") for i in range(len(arr)): image = np.zeros((nx[i]*config.stride,ny[i]*config.stride,3)) for j in range(3): result = self.pred.eval({self.images: arr[i][:,:,:,j].reshape([nx[i]*ny[i], config.image_size, config.image_size, 1])}) result = merge(result, [nx[i], ny[i]]) result = result.squeeze() image[:, :, j] = result image_path = os.path.join(os.getcwd(), config.sample_dir) image_path = os.path.join(image_path, "test_output%03d.png"%i) imsave(image, image_path)
def narrow_docids(self, idx): m0 = [ decode_array(idx[feat]) for feat in self.feats if idx.has_key(feat) ] if not m0: return [] refs = merge(m0) locs = [(refs[i], refs[i + 1]) for i in xrange(0, len(refs), 2)] return locs
def predict(self, test_image): arrdata, arrCrCb, nx, ny = pre_setting(test_image) result = self.sess.run(self.conv3, feed_dict={self.images: arrdata}) result = merge(result, arrCrCb, [nx, ny]) return result
def get_sitemap(year, collection, lastmod, options): """Gets a single sitemap, downloading it if the sitemap has changed. Downloads the root sitemap (year==None, collection==None), or the sitemap for a year (collection==None), or the sitemap for a particular year and collection. Pass lastmod which is the current modification time of the file according to its parent sitemap, which is how it knows to return a cached copy. Returns the sitemap parsed into a DOM. """ # Construct the URL and the path to where to cache the file on disk. if year == None: url = "http://www.gpo.gov/smap/fdsys/sitemap.xml" path = "fdsys/sitemap/sitemap.xml" elif collection == None: url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year) path = "fdsys/sitemap/%s/sitemap.xml" % year else: url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection) path = "fdsys/sitemap/%s/%s.xml" % (year, collection) # Should we re-download the file? lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt") if options.get("cached", False): # If --cached is used, don't hit the network. force = False elif not lastmod: # No *current* lastmod date is known for this file (because it is the master # sitemap file, probably), so always download. force = True else: # If the file is out of date or --force is used, download the file. cache_lastmod = utils.read(lastmod_cache_file) force = (lastmod != cache_lastmod) or options.get("force", False) if force: logging.warn("Downloading: %s" % url) body = utils.download(url, path, utils.merge(options, { 'force': force, 'binary': True })) if not body: raise Exception("Failed to download %s" % url) # Write the current last modified date to disk so we know the next time whether # we need to fetch the file. if lastmod and not options.get("cached", False): utils.write(lastmod, lastmod_cache_file) try: return etree.fromstring(body) except etree.XMLSyntaxError as e: raise Exception("XML syntax error in %s: %s" % (url, str(e)))
def save_imgs(self, epoch): r, c = 3, 3 noise = np.random.normal(0, 1, (r * c, 100)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 #ims("images/pokemon_%d.png" % epoch,utils.merge(gen_imgs,[3,3])) ims('images/pokemon_%d.png'%epoch, utils.merge(gen_imgs,[3,3]))
def train(self, config): if config.is_train: input_setup(self.sess, config) else: nx, ny = input_setup(self.sess, config) if config.is_train: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5") else: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "test.h5") train_data, train_label = read_data(data_dir) # Stochastic gradient descent with the standard backpropagation self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss) tf.initialize_all_variables().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if config.is_train: print("Training...") for ep in xrange(config.epoch): # Run by batch images batch_idxs = len(train_data) // config.batch_size for idx in xrange(0, batch_idxs): batch_images = train_data[idx * config.batch_size: (idx + 1) * config.batch_size] batch_labels = train_label[idx * config.batch_size: (idx + 1) * config.batch_size] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: batch_images, self.labels: batch_labels}) if counter % 10 == 0: print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" \ % ((ep + 1), counter, time.time() - start_time, err)) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) else: print("Testing...") result = self.pred.eval({self.images: train_data, self.labels: train_label}) result = merge(result, [nx, ny]) result = result.squeeze() image_path = os.path.join(os.getcwd(), config.sample_dir) image_path = os.path.join(image_path, "test_image.png") imsave(result, image_path)
def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options): # Where should we store the file? path = get_output_path(year, collection, package_name, granule_name, options) if not path: return # should skip # Do we need to update this record? lastmod_cache_file = path + "/lastmod.txt" cache_lastmod = utils.read(lastmod_cache_file) force = ((lastmod != cache_lastmod) or options.get( "force", False)) and not options.get("cached", False) # Try downloading files for each file type. targets = get_package_files(package_name, granule_name, path) for file_type in file_types: if file_type not in targets: raise Exception("Invalid file type: %s" % file_type) f_url, f_path = targets[file_type] if (not force) and os.path.exists(f_path): continue # we already have the current file logging.warn("Downloading: " + f_path) data = utils.download( f_url, f_path, utils.merge( options, { 'xml': True, 'force': force, 'to_cache': False, 'needs_content': file_type == "text" and f_path.endswith(".html"), })) if not data: if file_type == "pdf": # expected to be present for all packages raise Exception("Failed to download %s" % package_name) else: # not all packages have all file types, but assume this is OK logging.error("file not found: " + f_url) if file_type == "text" and f_path.endswith(".html"): # The "text" format files are put in an HTML container. Unwrap it into a .txt file. # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it? # html.fromstring does auto-detection. with open(f_path[0:-4] + "txt", "w") as f: text_content = unicode(html.fromstring(data).text_content()) f.write(text_content.encode("utf8")) # Write the current last modified date to disk so we know the next time whether # we need to fetch the files for this sitemap item. if lastmod and not options.get("cached", False): utils.write(lastmod, lastmod_cache_file)
def fetch_vote(vote_id, options): logging.info("\n[%s] Fetching..." % vote_id) vote_chamber, vote_number, vote_congress, vote_session_year = utils.split_vote_id(vote_id) if vote_chamber == "h": url = "http://clerk.house.gov/evs/%s/roll%03d.xml" % (vote_session_year, int(vote_number)) else: session_num = int(vote_session_year) - utils.get_congress_first_year(int(vote_congress)) + 1 url = "http://www.senate.gov/legislative/LIS/roll_call_votes/vote%d%d/vote_%d_%d_%05d.xml" % (int(vote_congress), session_num, int(vote_congress), session_num, int(vote_number)) # fetch vote XML page body = utils.download( url, "%s/votes/%s/%s%s/%s%s.xml" % (vote_congress, vote_session_year, vote_chamber, vote_number, vote_chamber, vote_number), utils.merge(options, {'binary': True}), ) if not body: return {'saved': False, 'ok': False, 'reason': "failed to download"} if options.get("download_only", False): return {'saved': False, 'ok': True, 'reason': "requested download only"} if "This vote was vacated" in body: # Vacated votes: 2011-484, 2012-327, ... # Remove file, since it may previously have existed with data. for f in (output_for_vote(vote_id, "json"), output_for_vote(vote_id, "xml")): if os.path.exists(f): os.unlink(f) return {'saved': False, 'ok': True, 'reason': "vote was vacated"} dom = etree.fromstring(body) vote = { 'vote_id': vote_id, 'chamber': vote_chamber, 'congress': int(vote_congress), 'session': vote_session_year, 'number': int(vote_number), 'updated_at': datetime.datetime.fromtimestamp(time.time()), 'source_url': url, } # do the heavy lifting if vote_chamber == "h": parse_house_vote(dom, vote) elif vote_chamber == "s": parse_senate_vote(dom, vote) # output and return output_vote(vote, options) return {'ok': True, 'saved': True}
def train(self, config): err_li = [] # NOTE : if train, the nx, ny are ingnored nx, ny = input_setup(config) data_dir = checkpoint_dir(config) input_, label_ = read_data(data_dir) # Stochastic gradient descent with the standard backpropagation #self.train_op = tf.train.GradientDescentOptimizer(config.learning_rate).minimize(self.loss) self.train_op = tf.train.AdamOptimizer( learning_rate=config.learning_rate).minimize(self.loss) #最小化w,b tf.initialize_all_variables().run() #session開始run counter = 0 time_ = time.time() self.load(config.checkpoint_dir) # Train if config.is_train: print("Now Start Training...") for ep in range(config.epoch): #總過跑幾次epoch # Run by batch images batch_idxs = len(input_) // config.batch_size for idx in range(0, batch_idxs): #每次跑128個batch batch_images = input_[idx * config.batch_size:(idx + 1) * config.batch_size] batch_labels = label_[idx * config.batch_size:(idx + 1) * config.batch_size] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={ self.images: batch_images, self.labels: batch_labels }) err_li.append( err) #feed_dict會傳到build model的self.image和self.label裡 if counter % 10 == 0: print( "Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" % ((ep + 1), counter, time.time() - time_, err)) #print(label_[1] - self.pred.eval({self.images: input_})[1],'loss:]',err) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) # Test else: print("Now Start Testing...") #print("nx","ny",nx,ny) result = self.pred.eval({self.images: input_}) #print(label_[1] - result[1]) image = merge(result, [nx, ny], self.c_dim) #checkimage(image) imsave(image, config.result_dir + '/result.png', config)
def get_train_test(test_start, test_stop, model='ar'): """Loads train and test data to datasets ... """ #logger.info('Retrives data') if type == 'ar': files_train = get_list_of_files_excluding_period(test_start, test_stop) files_test = get_list_of_files(test_start, test_stop) else: files_train = get_list_of_files_excluding_period_traditional_model( test_start, test_stop) files_test = get_list_of_files_traditional_model(test_start, test_stop) #logger.info('Detected the relevant files. ') train_dataset = merge(files_train) #logger.info('Merged training data for {} to {}'.format(test_start, #test_stop)) test_dataset = merge(files_test) #logger.info('Merged test data for {} to {}'.format(test_start, test_stop)) return train_dataset, test_dataset
def save_samples(sess, val_z, model, dir_name, global_step, shape): """ Function to save samples during training """ fake_samples = sess.run(model.G, {model.z: val_z}) fake_samples = 255 * ((fake_samples + 1.) / 2.) merged_samples = utils.merge(fake_samples, size=shape) fn = "{:0>6d}.png".format(global_step) scipy.misc.imsave(os.path.join(dir_name, fn), merged_samples)
def _add(self, ngram): # print ngram if len(ngram) == 1: self.unigram_vocab.add(ngram[0]) self.unigram_counter.add(ngram) if len(ngram) == 2: self.bigram_vocab.add(utils.merge(ngram)) self.bigram_counter.add(ngram) if len(ngram) == 3: self.trigram_vocab.add(utils.merge(ngram)) self.trigram_counter.add(ngram) if len(ngram) > 2: key = ' '.join(ngram[:-1]) value = ngram[-1] try: self.adj_words[key].add(value) except: self.adj_words[key] = set() self.adj_words[key].add(value)
def mirror_bulkdata_file(sitemap, url, item_path, lastmod, options): # Return a list of files we downloaded. results = [] # Where should we store the file? path = "%s/fdsys/%s/%s" % (utils.data_dir(), sitemap["collection"], item_path) # For BILLSTATUS, store this along with where we store the rest of bill # status data. if sitemap["collection"] == "BILLSTATUS": from bills import output_for_bill bill_id, version_code = get_bill_id_for_package(os.path.splitext( os.path.basename(item_path))[0], with_version=False) path = output_for_bill(bill_id, FDSYS_BILLSTATUS_FILENAME, is_data_dot=False) # Where should we store the lastmod found in the sitemap so that # we can tell later if the file has changed? lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt" # Do we already have this file up to date? if os.path.exists(lastmod_cache_file) and not options.get("force", False): if lastmod == utils.read(lastmod_cache_file): return # With --cached, skip if the file is already downloaded. if os.path.exists(path) and options.get("cached", False): return # Download. logging.warn("Downloading: " + path) data = utils.download( url, path, utils.merge( options, { 'binary': True, 'force': True, # decision to cache was made above 'to_cache': False, })) results.append(path) if not data: # Something failed. return # Write the current last modified date back to disk so we know the next time whether # we need to fetch the file again. utils.write(lastmod, lastmod_cache_file) return results
def train(args): if args.is_train: input_setup(args) else: nx, ny = input_setup(args) counter = 0 start_time = time.time() if args.is_train: print("Training...") data_dir = os.path.join('./{}'.format(args.checkpoint_dir), "train.h5") train_data, train_label = read_data(data_dir) display_step = 5 for step in range(args.epochs): batch_idxs = len(train_data) // args.batch_size for idx in range(0, batch_idxs): batch_images = train_data[idx * args.batch_size:(idx + 1) * args.batch_size] batch_labels = train_label[idx * args.batch_size:(idx + 1) * args.batch_size] run_optimization(batch_images, batch_labels) if step % display_step == 0: pred = srcnn(batch_images) loss = mse(pred, batch_labels) #psnr_loss = psnr(batch_labels, pred) #acc = accuracy(pred, batch_y) #print("step: %i, loss: %f", "psnr_loss: %f" %(step, loss, psnr_loss)) #print("Step:'{0}', Loss:'{1}', PSNR: '{2}'".format(step, loss, psnr_loss)) print("step: %i, loss: %f" % (step, loss)) else: print("Testing...") data_dir = os.path.join('./{}'.format(args.checkpoint_dir), "test.h5") test_data, test_label = read_data(data_dir) result = srcnn(test_data) result = merge(result, [nx, ny]) result = result.squeeze() image_path = os.path.join(os.getcwd(), args.sample_dir) image_path = os.path.join(image_path, "test_image.png") print(result.shape) imsave(result, image_path)
def get_image_files(attrs): images = [] directory = os.path.join(config.get('crawled', 'image_dir'), attrs) for dirname, subdirs, filenames in tf.gfile.Walk(directory): if len(subdirs) == 0: files = filter(lambda filename: 'jpg' in filename, filenames) filepaths = map(lambda file: os.path.join(dirname, file), files) images.append(filepaths) images = merge(images) print 'There are {} items in {}'.format(len(images), directory) return images
def narrow_docids(self, idx): m0 = [decode_array(idx[w]) for w in self.r0 if idx.has_key(w)] if self.r0 and not m0: return [] m2 = [decode_array(idx[w]) for w in self.r2 if idx.has_key(w)] if self.r2 and not m2: return [] if self.r1: try: refs = intersect(decode_array(idx[w]) for w in self.r1) except KeyError: return [] refs = union(refs, [m for m in (m0, m2) if m]) elif not self.r2: refs = merge(m0) else: refs = union(merge(m0), [m2]) # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ] locs = [(refs[i], refs[i + 1]) for i in xrange(0, len(refs), 2)] return locs
def mirror_file(year, collection, package_name, lastmod, granule_name, file_types, options): # Where should we store the file? path = get_output_path(year, collection, package_name, granule_name, options) if not path: return # should skip # Do we need to update this record? lastmod_cache_file = path + "/lastmod.txt" cache_lastmod = utils.read(lastmod_cache_file) force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False) # Try downloading files for each file type. targets = get_package_files(package_name, granule_name, path) updated_file_types = set() for file_type in file_types: if file_type not in targets: raise Exception("Invalid file type: %s" % file_type) f_url, f_path = targets[file_type] if (not force) and os.path.exists(f_path): continue # we already have the current file logging.warn("Downloading: " + f_path) data = utils.download(f_url, f_path, utils.merge(options, { 'binary': True, 'force': force, 'to_cache': False, 'needs_content': file_type == "text" and f_path.endswith(".html"), })) updated_file_types.add(file_type) if not data: if file_type == "pdf": # expected to be present for all packages raise Exception("Failed to download %s" % package_name) else: # not all packages have all file types, but assume this is OK logging.error("file not found: " + f_url) continue if file_type == "text" and f_path.endswith(".html"): # The "text" format files are put in an HTML container. Unwrap it into a .txt file. # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it? # html.fromstring does auto-detection. with open(f_path[0:-4] + "txt", "w") as f: text_content = unicode(html.fromstring(data).text_content()) f.write(text_content.encode("utf8")) if collection == "BILLS" and "mods" in updated_file_types: # When we download bill files, also create the text-versions/data.json file # which extracts commonly used components of the MODS XML. from bill_versions import write_bill_version_metadata write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True)) # Write the current last modified date to disk so we know the next time whether # we need to fetch the files for this sitemap item. if lastmod and not options.get("cached", False): utils.write(lastmod, lastmod_cache_file)
def narrow_docids(self, idx): m0 = [ decode_array(idx[w]) for w in self.r0 if idx.has_key(w) ] if self.r0 and not m0: return [] m2 = [ decode_array(idx[w]) for w in self.r2 if idx.has_key(w) ] if self.r2 and not m2: return [] if self.r1: try: refs = intersect( decode_array(idx[w]) for w in self.r1 ) except KeyError: return [] refs = union(refs, [ m for m in (m0,m2) if m ]) elif not self.r2: refs = merge(m0) else: refs = union(merge(m0), [m2]) # Now: refs = [ docid1,sentid1, docid2,sentid2, ... ] locs = [ (refs[i], refs[i+1]) for i in xrange(0, len(refs), 2) ] return locs
def generate_validation_batch(required_input_keys, required_output_keys, set="validation"): # generate sunny data sunny_length = get_lenght_of_set(name="sunny", set=set) regular_length = get_lenght_of_set(name="regular", set=set) sunny_batches = int(np.ceil(sunny_length / float(_config().sunny_batch_size))) regular_batches = int(np.ceil(regular_length / float(_config().batch_size))) if "sunny" in required_input_keys or "segmentation" in required_output_keys: num_batches = max(sunny_batches, regular_batches) else: num_batches = regular_batches num_chunks = int(np.ceil(num_batches / float(_config().batches_per_chunk))) sunny_chunk_size = _config().batches_per_chunk * _config().sunny_batch_size regular_chunk_size = _config().batches_per_chunk * _config().batch_size for n in xrange(num_chunks): result = {} input_keys_to_do = list(required_input_keys) # clone output_keys_to_do = list(required_output_keys) # clone if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do: indices = range(n*sunny_chunk_size, (n+1)*sunny_chunk_size) sunny_patient_data = get_sunny_patient_data(indices, set="train") result = utils.merge(result, sunny_patient_data) input_keys_to_do.remove("sunny") output_keys_to_do.remove("segmentation") indices = range(n*regular_chunk_size, (n+1)*regular_chunk_size) kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set=set, preprocess_function=_config().preprocess_validation) result = utils.merge(result, kaggle_data) yield result
def generate_train_batch(required_input_keys, required_output_keys): """Creates an iterator that returns train batches.""" sunny_chunk_size = _config().sunny_batch_size * _config().batches_per_chunk chunk_size = _config().batch_size * _config().batches_per_chunk while True: result = {} input_keys_to_do = list(required_input_keys) #clone output_keys_to_do = list(required_output_keys) #clone if "sunny" in input_keys_to_do or "segmentation" in output_keys_to_do: indices = _config().rng.randint(0, len(sunny_train_images), sunny_chunk_size) sunny_patient_data = get_sunny_patient_data(indices, set="train") result = utils.merge(result, sunny_patient_data) input_keys_to_do.remove("sunny") output_keys_to_do.remove("segmentation") indices = _config().rng.randint(0, len(train_patient_folders), chunk_size) # kaggle_data = get_patient_data(indices, input_keys_to_do, output_keys_to_do, set="train", preprocess_function=_config().preprocess_train) result = utils.merge(result, kaggle_data) yield result
def fetch_version(bill_version_id, options): # Download MODS etc. logging.info("\n[%s] Fetching..." % bill_version_id) bill_type, number, congress, version_code = utils.split_bill_version_id(bill_version_id) # bill_id = "%s%s-%s" % (bill_type, number, congress) utils.download( mods_url_for(bill_version_id), document_filename_for(bill_version_id, "mods.xml"), utils.merge(options, {'binary': True, 'to_cache': False}) ) return write_bill_version_metadata(bill_version_id)
def run(options): # Download the TSV file. cache_zip_path = "adler-wilkerson-bills.zip" utils.download( "http://congressionalbills.org/billfiles/bills80-92.zip", cache_zip_path, utils.merge(options, {'binary': True, 'needs_content': False})) # Unzip in memory and process the records. zfile = zipfile.ZipFile(utils.cache_dir() + "/" + cache_zip_path) csvreader = csv.DictReader(zfile.open("bills80-92.txt"), delimiter="\t") for record in csvreader: rec = process_bill(record) import pprint pprint.pprint(rec)
def run(options): amendment_id = options.get('amendment_id', None) bill_id = options.get('bill_id', None) search_state = { } if amendment_id: amendment_type, number, congress = utils.split_bill_id(amendment_id) to_fetch = [amendment_id] elif bill_id: # first, crawl the bill bill_type, number, congress = utils.split_bill_id(bill_id) bill_status = fetch_bill(bill_id, options) if bill_status['ok']: bill = json.loads(utils.read(output_for_bill(bill_id, "json"))) to_fetch = [x["amendment_id"] for x in bill["amendments"]] else: logging.error("Couldn't download information for that bill.") return None else: congress = options.get('congress', utils.current_congress()) to_fetch = bill_ids_for(congress, utils.merge(options, {'amendments': True}), bill_states=search_state) if not to_fetch: if options.get("fast", False): logging.warn("No amendments changed.") else: logging.error("Error figuring out which amendments to download, aborting.") return None limit = options.get('limit', None) if limit: to_fetch = to_fetch[:int(limit)] if options.get('pages_only', False): return None logging.warn("Going to fetch %i amendments from congress #%s" % (len(to_fetch), congress)) saved_amendments = utils.process_set(to_fetch, fetch_amendment, options) # keep record of the last state of all these amendments, for later fast-searching save_bill_search_state(saved_amendments, search_state)
def mirror_bulkdata_file(collection, url, item_path, lastmod, options): # Return a list of files we downloaded. results = [] # Where should we store the file? path = "%s/govinfo/%s/%s" % (utils.data_dir(), collection, item_path) # For BILLSTATUS, store this along with where we store the rest of bill # status data. if collection == "BILLSTATUS": from bills import output_for_bill bill_id, version_code = get_bill_id_for_package(os.path.splitext(os.path.basename(item_path.replace("BILLSTATUS-", "")))[0], with_version=False) path = output_for_bill(bill_id, FDSYS_BILLSTATUS_FILENAME, is_data_dot=False) # Where should we store the lastmod found in the sitemap so that # we can tell later if the file has changed? lastmod_cache_file = os.path.splitext(path)[0] + "-lastmod.txt" # Do we already have this file up to date? if os.path.exists(lastmod_cache_file) and not options.get("force", False): if lastmod == utils.read(lastmod_cache_file): return # With --cached, skip if the file is already downloaded. if os.path.exists(path) and options.get("cached", False): return # Download. logging.warn("Downloading: " + path) data = utils.download(url, path, utils.merge(options, { 'binary': True, 'force': True, # decision to cache was made above 'to_cache': False, })) results.append(path) if not data: # Something failed. return # Write the current last modified date back to disk so we know the next time whether # we need to fetch the file again. utils.write(lastmod, lastmod_cache_file) return results
def start(self): """Start up the bot process Calls the ``connect`` method and then (if ``stream`` is set) begins the event loop. """ login_data = self.connect() if not login_data: return None self.running = True for handler in self._hooks[events.SETUP]: handler(merge(login_data, {"client": self.client})) if self.stream: try: self.read() except: self.stop() raise
def test(self, name="test", options=None, fixed=False): if options == None: options = self.options t = strfnow() for option in options: if fixed == True: a, b, c, d = self.loader.tests[option] else: a, b, c, d = self.loader.next(set_option=option) feed = {self.a: a, self.b: b, self.c: c, self.d: d} fname = "%s/%s_option:%s_time:%s.png" % (self.sample_dir, name, option, t) g_img, g2_img, g3_img = self.sess.run([self.g1_img, self.g2_img, self.g3_img], feed_dict=feed) imsave(fname, merge(a, b, c, d, g_img, g2_img, g3_img))
def parse_data(url): r = requests.get(url, headers=utils.merge(DEFAULT_HEADERS, {})) soup = BeautifulSoup(r.text, "html.parser") if r.status_code != 200: return None full_data = {} for t in soup.select('table'): section = t.select('th')[0].contents[0] h = [get_contents(e.contents) for e in t.select('.ttl > a')] c = [get_contents(e.contents) for e in t.select('.nfo')] full_data[section] = dict(zip(h, c)) new_data = {} for key, val in full_data.items(): for subk, subv in val.items(): new_data["%s:%s" % (key, subk)] = subv #print json.dumps({"%s:%s" % (key, subk): subv}) return new_data
def get_sitemap(year, collection, lastmod, options): # Construct the URL and the path to where to cache the file on disk. if year == None: url = "http://www.gpo.gov/smap/fdsys/sitemap.xml" path = "fdsys/sitemap/sitemap.xml" elif collection == None: url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/sitemap_%s.xml" % (year, year) path = "fdsys/sitemap/%s/sitemap.xml" % year else: url = "http://www.gpo.gov/smap/fdsys/sitemap_%s/%s_%s_sitemap.xml" % (year, year, collection) path = "fdsys/sitemap/%s/%s.xml" % (year, collection) # Should we re-download the file? lastmod_cache_file = utils.cache_dir() + "/" + path.replace(".xml", "-lastmod.txt") if options.get("cached", False): # If --cached is used, don't hit the network. force = False elif not lastmod: # No *current* lastmod date is known for this file (because it is the master # sitemap file, probably), so always download. force = True else: # If the file is out of date or --force is used, download the file. cache_lastmod = utils.read(lastmod_cache_file) force = (lastmod != cache_lastmod) or options.get("force", False) if force: logging.warn("Downloading: %s" % url) body = utils.download(url, path, utils.merge(options, {"force": force, "xml": True})) if not body: raise Exception("Failed to download %s" % url) # Write the current last modified date to disk so we know the next time whether # we need to fetch the file. if lastmod and not options.get("cached", False): utils.write(lastmod, lastmod_cache_file) return etree.fromstring(body)
def mirror_package_zipfile(collection, package_name, file_path, lastmod, lastmod_cache, options): # Do we already have this file updated? if lastmod_cache.get("package") == lastmod: if not options.get("force", False): return # With --cached, skip if the file is already downloaded. if os.path.exists(file_path) and options.get("cached", False): return # Download. file_url = GOVINFO_BASE_URL + "content/pkg/{}-{}.zip".format(collection, package_name) logging.warn("Downloading: " + file_path) data = utils.download(file_url, file_path, utils.merge(options, { 'binary': True, 'force': True, # decision to cache was made above 'to_cache': False, 'needs_content': False, })) # Update the lastmod of the downloaded file. lastmod_cache['package'] = lastmod return True
def update_sitemap2(url, current_lastmod, how_we_got_here, options, lastmod_cache, cache_file): # Return a list of files we downloaded. results = [] # Download anew if the current_lastmod doesn't match the stored lastmod # in our cache, and if --cache is not specified. Or if --force is given. # If we're not downloading it, load it from disk because we still have # to process each sitemap to ensure we've downloaded all of the package # files the user wants. download = should_download_sitemap(lastmod_cache.get("lastmod"), current_lastmod, options) # Download, or just retreive from cache. if download: logging.warn("Downloading: %s" % url) body = utils.download( url, cache_file, utils.merge(options, { 'force': download, 'binary': True })) if not body: raise Exception("Failed to download %s" % url) # If we downloaded a new file, update the lastmod for our cache. if download and current_lastmod: lastmod_cache["lastmod"] = current_lastmod # Load the XML. try: sitemap = etree.fromstring(body) except etree.XMLSyntaxError as e: raise Exception("XML syntax error in %s: %s" % (url, str(e))) # Process the entries. if sitemap.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}sitemapindex": # This is a sitemap index. Process the sitemaps listed in this # sitemapindex recursively. for node in sitemap.xpath("x:sitemap", namespaces=ns): # Get URL and lastmod date of the sitemap. url = str(node.xpath("string(x:loc)", namespaces=ns)) lastmod = str(node.xpath("string(x:lastmod)", namespaces=ns)) sitemap_results = update_sitemap(url, lastmod, how_we_got_here, options) if sitemap_results is not None: results = results + sitemap_results elif sitemap.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset": # This is a regular sitemap with content items listed. # Process the items. for node in sitemap.xpath("x:url", namespaces=ns): url = str(node.xpath("string(x:loc)", namespaces=ns)) lastmod = str(node.xpath("string(x:lastmod)", namespaces=ns)) m = re.match(COLLECTION_BASE_URL + r"([^-]+)-(.*)", url) if m: collection = m.group(1) package_name = m.group(2) if options.get("filter") and not re.search(options["filter"], package_name): continue try: mirror_results = mirror_package(collection, package_name, lastmod, lastmod_cache.setdefault("packages", {}), options) except: logging.exception("Error fetching package {} in collection {} from {}.".format(package_name, collection, url)) mirror_results = [] results.extend(mirror_results) else: # This is a bulk data item. Extract components of the URL. m = re.match(BULKDATA_BASE_URL + r"([^/]+)/(.*)", url) if not m: raise Exception("Unmatched bulk data file URL (%s) at %s." % (url, "->".join(how_we_got_here))) collection = m.group(1) item_path = m.group(2) if options.get("filter") and not re.search(options["filter"], item_path): continue try: mirror_results = mirror_bulkdata_file(collection, url, item_path, lastmod, options) except: logging.exception("Error fetching file {} in collection {} from {}.".format(item_path, collection, url)) mirror_results = None if mirror_results is not None and len(mirror_results) > 0: results = results + mirror_results else: raise Exception("Unknown sitemap type (%s) at the root sitemap of %s." % (sitemap.tag, url)) return results
def mirror_package_or_granule(sitemap, package_name, granule_name, lastmod, options): # Return a list of files we downloaded. results = [] # Where should we store the file? Each collection has a different # file system layout (for BILLS, we put bill text along where the # bills scraper puts bills). path = get_output_path(sitemap, package_name, granule_name, options) if not path: return # should skip # Get the lastmod times of the files previously saved for this package. file_lastmod_changed = False file_lastmod = { } lastmod_cache_file = path + "/lastmod.json" if os.path.exists(lastmod_cache_file): file_lastmod = json.load(open(lastmod_cache_file)) # Try downloading files for each file type. targets = get_package_files(package_name, granule_name) for file_type, (file_url, relpath) in targets.items(): # Does the user want to save this file type? If the user didn't # specify --store, save everything. Otherwise only save the # file types asked for. if options.get("store", "") and file_type not in options["store"].split(","): continue # Do we already have this file updated? The file_lastmod JSON # stores the lastmod from the sitemap at the time we downloaded # the individual file. if file_lastmod.get(file_type) == lastmod: if not options.get("force", False): continue # With --cached, skip if the file is already downloaded. file_path = os.path.join(path, relpath) if os.path.exists(file_path) and options.get("cached", False): continue # Download. logging.warn("Downloading: " + file_path) data = utils.download(file_url, file_path, utils.merge(options, { 'binary': True, 'force': True, # decision to cache was made above 'to_cache': False, 'return_status_code_on_error': True, 'needs_content': (file_type == "text" and file_path.endswith(".html")), })) results.append(file_path) # Download failed? if data == 404: # Not all packages have all file types. Just check the ones we know # must be there. if file_type in ("pdf", "zip"): # expected to be present for all packages raise Exception("Failed to download %s %s (404)" % (package_name, file_type)) elif sitemap["collection"] == "BILLS" and file_type in ("text", "mods"): # expected to be present for bills raise Exception("Failed to download %s %s (404)" % (package_name, file_type)) elif data is True: # Download was successful but needs_content was False so we don't have the # file content. Instead, True is returned. Strangely isintance(True, int) is # True (!!!) so we have to test for True separately from testing if we got a # return code integer. pass elif not data or isinstance(data, int): # There was some other error - skip the rest. Don't # update file_lastmod! continue # Update the lastmod of the downloaded file. If the download failed, # because of a 404, we still update this to indicate that the file # definitively does not exist. We won't try fetcihng it again. file_lastmod[file_type] = lastmod file_lastmod_changed = True # The "text" format files are put in an HTML container. Unwrap it into a .txt file. # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it? # html.fromstring does auto-detection. if file_type == "text" and file_path.endswith(".html"): file_path_text = file_path[0:-4] + "txt" logging.info("Unwrapping HTML to: " + file_path_text) with open(file_path_text, "w") as f: f.write(unwrap_text_in_html(data)) if sitemap["collection"] == "BILLS" and file_type == "mods": # When we download bill files, also create the text-versions/data.json file # which extracts commonly used components of the MODS XML, whenever we update # that MODS file. extract_bill_version_metadata(package_name, path) # Write the current last modified date back to disk so we know the next time whether # we need to fetch the files for this sitemap item. Assuming we fetched anything. # If nothing new was fetched, then there is no reason to update the file. if file_lastmod and file_lastmod_changed: utils.write(json.dumps(file_lastmod), lastmod_cache_file) return results
def mirror_package(year, collection, package_name, lastmod, granule_name, file_types, options): # Where should we store the file? path = get_output_path(year, collection, package_name, granule_name, options) if not path: return # should skip # Do we need to update this record? lastmod_cache_file = path + "/lastmod.txt" cache_lastmod = utils.read(lastmod_cache_file) force = ((lastmod != cache_lastmod) or options.get("force", False)) and not options.get("cached", False) # Try downloading files for each file type. targets = get_package_files(package_name, granule_name, path) updated_file_types = set() for file_type in file_types: if file_type not in targets: raise Exception("Invalid file type: %s" % file_type) # For BILLS, XML was not available until the 108th Congress, though even after that # it was spotty until the 111th or so Congress. if file_type == "xml" and collection == "BILLS" and int(package_name[6:9]) < 108: continue f_url, f_path = targets[file_type] if (not force) and os.path.exists(f_path): continue # we already have the current file logging.warn("Downloading: " + f_path) data = utils.download(f_url, f_path, utils.merge(options, { 'binary': True, 'force': force, 'to_cache': False, 'needs_content': file_type == "text" and f_path.endswith(".html"), })) updated_file_types.add(file_type) if not data: if file_type in ("pdf", "zip"): # expected to be present for all packages raise Exception("Failed to download %s" % package_name) else: # not all packages have all file types, but assume this is OK logging.error("file not found: " + f_url) continue if file_type == "text" and f_path.endswith(".html"): # The "text" format files are put in an HTML container. Unwrap it into a .txt file. # TODO: Encoding? The HTTP content-type header says UTF-8, but do we trust it? # html.fromstring does auto-detection. with open(f_path[0:-4] + "txt", "w") as f: f.write(unwrap_text_in_html(data)) if file_type == "zip": # This is the entire package in a ZIP file. Extract the contents of this file # to the appropriate paths. with zipfile.ZipFile(f_path) as zf: for z2 in zf.namelist(): if not z2.startswith(package_name + "/"): raise ValueError("Unmatched file name in package ZIP: " + z2) z2 = z2[len(package_name)+1:] # strip off leading package name if z2 in ("mods.xml", "premis.xml", "dip.xml"): # Extract this file to a file of the same name. z3 = path + "/" + z2 elif z2 == "pdf/" + package_name + ".pdf": # Extract this file to "document.pdf". z3 = path + "/document.pdf" elif z2 == "html/" + package_name + ".htm": # Extract this file and unwrap text to "document.txt". z3 = path + "/document.txt" else: raise ValueError("Unmatched file name in package ZIP: " + z2) with zf.open(package_name + "/" + z2) as zff: with open(z3, "w") as output_file: data = zff.read() if z3 == path + "/document.txt": data = unwrap_text_in_html(data) output_file.write(data) if collection == "BILLS" and "mods" in updated_file_types: # When we download bill files, also create the text-versions/data.json file # which extracts commonly used components of the MODS XML. from bill_versions import write_bill_version_metadata write_bill_version_metadata(get_bill_id_for_package(package_name, with_version=True)) # Write the current last modified date to disk so we know the next time whether # we need to fetch the files for this sitemap item. if lastmod and not options.get("cached", False): utils.write(lastmod, lastmod_cache_file)
def mirror_packages(fetch_collections, options): """Create a local mirror of FDSys document files. Only downloads changed files, according to the sitemap. Run update_sitemap_cache first. Pass fetch_collections as None, or to restrict the update to particular FDSys collections a set of collection names. Set options["store"] to a comma-separated list of file types (pdf, mods, text, xml, zip). """ # For determining whether we need to process a sitemap file again on a later # run, we need to make a key out of the command line arguments that affect # which files we are downloading. cache_options_key = repr(tuple(sorted(kv for kv in options.items() if kv[0] in ("store", "year", "congress", "granules", "cached")))) file_types = options["store"].split(",") # Process each FDSys sitemap... for sitemap in sorted(glob.glob(utils.cache_dir() + "/fdsys/sitemap/*/*.xml")): # Should we process this file? year, collection = re.search(r"/(\d+)/([^/]+).xml$", sitemap).groups() if "year" in options and year != options["year"]: continue if "congress" in options and int(year) not in utils.get_congress_years(int(options["congress"])): continue if fetch_collections and collection not in fetch_collections: continue # Has this sitemap changed since the last successful mirror? # # The sitemap's last modification time is stored in ...-lastmod.txt, # which comes from the sitemap's parent sitemap's lastmod listing for # the file. # # Compare that to the lastmod value of when we last did a successful mirror. # This function can be run to fetch different sets of files, so get the # lastmod value corresponding to the current run arguments. sitemap_store_state_file = re.sub(r"\.xml$", "-store-state.json", sitemap) sitemap_last_mod = open(re.sub(r"\.xml$", "-lastmod.txt", sitemap)).read() if os.path.exists(sitemap_store_state_file): sitemap_store_state = json.load(open(sitemap_store_state_file)) if sitemap_store_state.get(cache_options_key) == sitemap_last_mod: # sitemap hasn't changed since the last time continue logging.info("scanning " + sitemap + "...") # Load the sitemap for this year & collection, and loop through each document. for package_name, lastmod in get_sitemap_entries(sitemap): # Add this package to the download list. file_list = [] if not options.get("granules", False): # Doing top-level package files (granule==None). file_list.append(None) else: # In some collections, like STATUTE, each document has subparts which are not # described in the sitemap. Load the main HTML page and scrape for the sub-files. # In the STATUTE collection, the MODS information in granules is redundant with # information in the top-level package MODS file. But the only way to get granule- # level PDFs is to go through the granules. content_detail_url = "http://www.gpo.gov/fdsys/pkg/%s/content-detail.html" % package_name content_index = utils.download(content_detail_url, "fdsys/package/%s/%s/%s.html" % (year, collection, package_name), utils.merge(options, { 'binary': True, })) if not content_index: raise Exception("Failed to download %s" % content_detail_url) for link in html.fromstring(content_index).cssselect("table.page-details-data-table td.rightLinkCell a"): if link.text == "More": m = re.match("granule/(.*)/(.*)/content-detail.html", link.get("href")) if not m or m.group(1) != package_name: raise Exception("Unmatched granule URL %s" % link.get("href")) granule_name = m.group(2) file_list.append(granule_name) # Download the files of the desired types. for granule_name in file_list: mirror_package(year, collection, package_name, lastmod, granule_name, file_types, options) # If we got this far, we successfully downloaded all of the files in this year/collection. # To speed up future updates, save the lastmod time of this sitemap in a file indicating # what we downloaded. The store-state file contains a JSON mapping of command line options # to the most recent lastmod value for this sitemap. sitemap_store_state = { } if os.path.exists(sitemap_store_state_file): sitemap_store_state = json.load(open(sitemap_store_state_file)) sitemap_store_state[cache_options_key] = sitemap_last_mod json.dump(sitemap_store_state, open(sitemap_store_state_file, "w"))
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session() as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, y_dim=10, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 if OPTION == 0: z_sample = np.random.uniform(-0.5, 0.5, size=(FLAGS.batch_size, dcgan.z_dim)) samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) save_images(samples, [8, 8], './samples/test_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime())) elif OPTION == 1: values = np.arange(0, 1, 1./FLAGS.batch_size) for idx in xrange(100): print(" [*] %d" % idx) z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim]) for kdx, z in enumerate(z_sample): z[idx] = values[kdx] samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) save_images(samples, [8, 8], './samples/test_arange_%s.png' % (idx)) elif OPTION == 2: values = np.arange(0, 1, 1./FLAGS.batch_size) for idx in [random.randint(0, 99) for _ in xrange(100)]: print(" [*] %d" % idx) z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim)) z_sample = np.tile(z, (FLAGS.batch_size, 1)) #z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim]) for kdx, z in enumerate(z_sample): z[idx] = values[kdx] samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) make_gif(samples, './samples/test_gif_%s.gif' % (idx)) elif OPTION == 3: values = np.arange(0, 1, 1./FLAGS.batch_size) for idx in xrange(100): print(" [*] %d" % idx) z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim]) for kdx, z in enumerate(z_sample): z[idx] = values[kdx] samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) make_gif(samples, './samples/test_gif_%s.gif' % (idx)) elif OPTION == 4: image_set = [] values = np.arange(0, 1, 1./FLAGS.batch_size) for idx in xrange(100): print(" [*] %d" % idx) z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim]) for kdx, z in enumerate(z_sample): z[idx] = values[kdx] image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})) make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx)) new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) for idx in range(64) + range(63, -1, -1)] make_gif(new_image_set, './samples/test_gif_merged.gif', duration=8) elif OPTION == 5: image_set = [] values = np.arange(0, 1, 1./FLAGS.batch_size) z_idx = [[random.randint(0,99) for _ in xrange(5)] for _ in xrange(200)] for idx in xrange(200): print(" [*] %d" % idx) #z_sample = np.zeros([FLAGS.batch_size, dcgan.z_dim]) z = np.random.uniform(-1e-1, 1e-1, size=(dcgan.z_dim)) z_sample = np.tile(z, (FLAGS.batch_size, 1)) for kdx, z in enumerate(z_sample): for jdx in xrange(5): z_sample[kdx][z_idx[idx][jdx]] = values[kdx] image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})) make_gif(image_set[-1], './samples/test_gif_%s.gif' % (idx)) new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 20]) for idx in range(64) + range(63, -1, -1)] make_gif(new_image_set, './samples/test_gif_random_merged.gif', duration=4) elif OPTION == 6: image_set = [] values = np.arange(0, 1, 1.0/FLAGS.batch_size).tolist() z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(100)] for idx in xrange(100): print(" [*] %d" % idx) z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim)) z_sample = np.tile(z, (FLAGS.batch_size, 1)) for kdx, z in enumerate(z_sample): for jdx in xrange(10): z_sample[kdx][z_idx[idx][jdx]] = values[kdx] image_set.append(sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})) save_images(image_set[-1], [8, 8], './samples/test_random_arange_%s.png' % (idx)) new_image_set = [merge(np.array([images[idx] for images in image_set]), [10, 10]) for idx in range(64) + range(63, -1, -1)] make_gif(new_image_set, './samples/test_gif_merged_random.gif', duration=4) elif OPTION == 7: for _ in xrange(50): z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(8)] zs = [] for idx in xrange(8): z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim)) zs.append(np.tile(z, (8, 1))) z_sample = np.concatenate(zs) values = np.arange(0, 1, 1/8.) for idx in xrange(FLAGS.batch_size): for jdx in xrange(8): z_sample[idx][z_idx[idx/8][jdx]] = values[idx%8] samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) save_images(samples, [8, 8], './samples/multiple_testt_%s.png' % strftime("%Y-%m-%d %H:%M:%S", gmtime())) elif OPTION == 8: counter = 0 for _ in xrange(50): import scipy.misc z_idx = [[random.randint(0,99) for _ in xrange(10)] for _ in xrange(8)] zs = [] for idx in xrange(8): z = np.random.uniform(-0.2, 0.2, size=(dcgan.z_dim)) zs.append(np.tile(z, (8, 1))) z_sample = np.concatenate(zs) values = np.arange(0, 1, 1/8.) for idx in xrange(FLAGS.batch_size): for jdx in xrange(8): z_sample[idx][z_idx[idx/8][jdx]] = values[idx%8] samples = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample}) for sample in samples: scipy.misc.imsave('./samples/turing/%s.png' % counter, sample) counter += 1 else: import scipy.misc from glob import glob samples = [] fnames = glob("/Users/carpedm20/Downloads/x/1/*.png") fnames = sorted(fnames, key = lambda x: int(x.split("_")[1]) * 10000 + int(x.split('_')[2].split(".")[0])) for f in fnames: samples.append(scipy.misc.imread(f)) make_gif(samples, './samples/training.gif', duration=8, true_image=True)
def getappinfo(steamhome, logindata): librarydirs = {} librarydirs[steamhome["path"]] = True try: for steamapp in utils.steamapps: libraryvdf = utils.vdf.load(open(os.path.join(steamhome["path"],steamapp,"libraryfolders.vdf"))) if libraryvdf is not None and "LibraryFolders" in libraryvdf: for folder in libraryvdf["LibraryFolders"]: librarydirs[libraryvdf["LibraryFolders"]["1"]] = True except: pass def checkinstalled(direxe): for lib in librarydirs: for steamapp in utils.steamapps: exe = os.path.join(lib,steamapp,"common",direxe) if os.path.exists(exe): return True return None appinfo = {} try: appinfofile = open(os.path.join(steamhome["path"],'appcache','appinfo.vdf'), 'rb') appinfodata = appinfofile.read().decode("ISO-8859-1") appinfofile.close() games = re.compile("\x02\x00common").split(appinfodata) appmatch = re.compile("\x02gameid\x00([\s\S]{3})") fsplit = re.compile("\x00[\x01\x02]") esplit = re.compile("(?=\x00\x01executable(?=\x00))") for game in games: gameid = appmatch.search(game) if gameid and gameid.group(1): appid = utils.getnum(gameid.group(1)) fielddict = {} fields = game.split(chr(1)) for field in fields: vals = field.split(chr(0)) if vals is not None and vals[0] is not None and vals[0] not in fielddict: try: fielddict[vals[0]] = vals[1] except: pass # probably an invalid key - ignore it appinfo[appid] = { "name": fielddict["name"].encode("ISO-8859-1"), "type": "" } if "installdir" in fielddict: appinfo[appid]["path"] = fielddict["installdir"] if "dlcappid" in fielddict: appinfo[appid]["isdlc"] = True if "type" in fielddict: appinfo[appid]["type"] = fielddict["type"] if fielddict["type"].upper() == "DLC": appinfo[appid]["isdlc"] = True if "logo" in fielddict: appinfo[appid]["logo"] = "http://cdn.akamai.steamstatic.com/steamcommunity/public/images/apps/" + str(appid) + "/" + fielddict["logo"] + ".jpg" if "installdir" in fielddict: exes = utils.zerowidthsplit(esplit, game) exes.pop(0) # discard the leading bit for e in exes: exedata={} exefields = fsplit.split(e) for ef in exefields: exevals = ef.split(chr(0)) if exevals[0] is not None and exevals[0] not in exedata: try: exedata[exevals[0]] = exevals[1] except: pass # probably an invalid key - ignore it if ("oslist" not in exedata or exedata["oslist"] == steamhome["platform"]) and ("osarch" not in exedata or exedata["osarch"] == steamhome["platarch"]): if os.path.isabs(exedata["executable"]): exedata["executable"] = exedata["executable"][1:] appinfo[appid]["exe"] = exedata["executable"] if "installdir" in fielddict and "executable" in exedata: appinfo[appid]["foundat"] = os.path.join(fielddict["installdir"],exedata["executable"]) if "CheckGuid" in fielddict: appinfo[appid]["guid"] = fielddict["CheckGuid"] if "checkguids" in fielddict: appinfo[appid]["guids"] = fielddict["checkguids"] break except: pass packageinfo = {} try: packageinfofile = open(os.path.join(steamhome["path"],'appcache','packageinfo.vdf'), 'rb') packageinfodata = packageinfofile.read().decode("ISO-8859-1") packageinfofile.close() pmatch = re.compile("PackageID\x00([\s\S]{4})\x02") ematch = re.compile("\x02ExpiryTime\x00([\s\S]{4})") amatch = re.compile("\x02.*?\x00([\s\S]{3})\x00") packages = utils.zerowidthsplit(re.compile("(?=\x00appids)"),packageinfodata) for package in packages: pkgmatch = pmatch.search(package) if pkgmatch is not None: pkgid = utils.getnum(pkgmatch.group(1)) packageinfo[pkgid] = {"name": "unknown", "appids": {}} try : expmatch = ematch.search(package) if expmatch is not None: exptime = int(utils.getnum(expmatch.group(1))) if exptime is not None and exptime < time.time(): packageinfo[pkgid]["expired"] = True except (RuntimeError, TypeError, NameError): pass appidkey = package[8:] while appidkey[0:1] == chr(2): appmatch = amatch.match(appidkey) if appmatch is not None: appid = utils.getnum(appmatch.group(1)) packageinfo[pkgid]["appids"][appid] = True appidkey = appidkey[len(appmatch.group(0)):] except: pass gamedb = {} for package in logindata["configdata"]["UserLocalConfigStore"]["Licenses"]: if package in packageinfo and "expired" not in packageinfo[package] and "appids" in packageinfo[package]: for appid in packageinfo[package]["appids"]: if appid in appinfo: if appid not in gamedb: gamedb[appid] = { "name": appinfo[appid]["name"] } if "logo" in appinfo[appid]: gamedb[appid]["gridimage"] = appinfo[appid]["logo"] if "packageid" not in gamedb[appid] or int(gamedb[appid]["packageid"]) < int(package): gamedb[appid]["data-packageid"] = package gamelist = {} conf = {} if "apps" in logindata["configdata"]["UserLocalConfigStore"]["Software"]["Valve"]["Steam"]: conf = logindata["configdata"]["UserLocalConfigStore"]["Software"]["Valve"]["Steam"]["apps"] for game in gamedb: if "type" in appinfo[game]: gametype = appinfo[game]["type"] if ("data-packageid" not in gamedb[game] or int(gamedb[game]["data-packageid"]) > 0) and (gametype == "" or gametype.upper() == "GAME" or gametype.upper() == "DLC"): confdata = {"LastPlayed": 0} if game in conf: confdata = utils.merge(confdata,conf[game]) tags = "" if "tags" in confdata: for tag in confdata["tags"]: tags += "::" + confdata["tags"][tag] if "hidden" in confdata: tags += "::hidden" dataattrs = { "data-categories": tags } if "LastPlayed" in confdata and int(confdata["LastPlayed"]) >= 1200000000: dataattrs["data-lastplayed"] = confdata["LastPlayed"] if game in appinfo: if "foundat" in appinfo[game] and appinfo[game]["foundat"] is not None and checkinstalled(appinfo[game]["foundat"]): dataattrs["data-exe"] = appinfo[game]["exe"] dataguids = "" if "guid" in appinfo[game]: dataguids = appinfo[game]["guid"] if "guids" in appinfo[game]: if dataguids: dataguids += ";" dataguids += appinfo[game]["guids"] if dataguids != "": dataattrs["data-guids"] = dataguids if "isdlc" in appinfo[game]: dataattrs["data-isdlc"] = True gamelist[game] = utils.merge(gamedb[game],dataattrs) return gamelist