def getIndexLocked(self, encQueueName): """Get contents of index file. Caller takes care of synchronization.""" found = false queues = self.listCrawlQueues() queueName = urllib.unquote(encQueueName) for queue in queues: if queueName == queue.queueName: found = true break if not found: logging.error('Queue %s not found' % encQueueName) return (C.CRAWLQUEUE_NAME_NOT_FOUND, 0, 0, None) if queue.completeState == C.CRAWLQUEUE_STATUS_PENDING: logging.error('Queue %s is incomplete.' % encQueueName) return (C.CRAWLQUEUE_INCOMPLETE, 0, 0, None) index_file = self.getCrawlQueueIndexFileName(encQueueName) try: fileContents = gfile.GFile(index_file, 'r').readlines() captionTime = int(fileContents[0][:-1]) numUrls = int(fileContents[1][:-1]) return (C.CRAWLQUEUE_OK, captionTime, numUrls, fileContents[2:]) except IOError, e: logging.error('Failed to get queue index file %s. IOError: %s' % \ (index_file, e)) return (C.CRAWLQUEUE_INTERNAL_ERROR, 0, 0, None)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') xm.setup_work_unit() if not gfile.Exists(FLAGS.workdir): gfile.MakeDirs(FLAGS.workdir) utils.dump_flags_to_file(os.path.join(FLAGS.workdir, 'flags.txt')) gin.bind_parameter('CuriosityEnvWrapper.scale_task_reward', 0.) gin.bind_parameter('CuriosityEnvWrapper.scale_surrogate_reward', 1.) gin.bind_parameter('AntWrapper.enable_die_condition', FLAGS.ant_env_enable_die_condition) gin.parse_config_files_and_bindings(None, FLAGS.gin_bindings) # Hardware crashes with: # Failed to open library! # dlopen: cannot load any more object with static TLS FLAGS.renderer = 'software' work_unit = None if FLAGS.xm_xid != -1: work_unit = xmanager_api.XManagerApi().get_current_work_unit() visualize_curiosity_reward(work_unit) with gfile.GFile(os.path.join(FLAGS.workdir, 'gin_config.txt'), 'w') as f: f.write(gin.operative_config_str())
def getReport(self, collection, reportName): """Return body of a summary report.""" self.logreplock.acquire() try: reports = self.getLogReports(collection, liblog.SUMMARY_REPORT) found = false incomplete = false for report in reports: if report.reportName == reportName: found = true if (report.completeState != COMPLETE and report.completeState != COMPLETE_REGENERATE): incomplete = true break if not found: logging.error('Report %s not found' % reportName) return (C.REPORT_NAME_NOT_FOUND, None, None) elif incomplete: logging.error('Report %s is incomplete' % reportName) return (C.REPORT_INCOMPLETE, report.toString(), None) (html_file, _) = liblog.get_report_filenames(self.entConfig, liblog.SUMMARY_REPORT, reportName, collection) try: reportContents = gfile.GFile(html_file, 'r').read() except IOError: return (C.REPORT_INTERNAL_ERROR, report.toString(), None) finally: self.logreplock.release() return (C.REPORT_OK, report.toString(), reportContents)
def __init__(self, encoder, decoder, mixer, embed_path, config, model="baseline"): """ Initializes your System :param encoder: an encoder that you constructed in train.py :param decoder: a decoder that you constructed in train.py :param args: pass in more arguments as needed """ self.encoder = encoder self.mixer = mixer self.decoder = decoder self.config = config if GOOGLE3: self.pretrained_embeddings = np.load( gfile.GFile(embed_path))["glove"] else: self.pretrained_embeddings = np.load(embed_path)["glove"] self.pretrained_embeddings_special_tokens = self.pretrained_embeddings[ 0:3] self.pretrained_embeddings_words = self.pretrained_embeddings[3:] self.model = model # ==== set up placeholder tokens ======== self.question_placeholder = tf.placeholder(tf.int32, shape=(None, None)) self.questions_lengths_placeholder = tf.placeholder(tf.int32, shape=(None)) self.context_placeholder = tf.placeholder( tf.int32, shape=(None, self.config.output_size)) self.context_lengths_placeholder = tf.placeholder(tf.int32, shape=(None)) self.answers_numeric_list = tf.placeholder(tf.int32, shape=(None, 2)) self.dropout_placeholder = tf.placeholder(tf.float32, shape=()) # context character embedding: batch, max context size in words, max_word_length self.context_tokens_placeholder = tf.placeholder( tf.int32, shape=[None, None, self.config.max_word_length]) # question character embedding: batch, max question size in words, max_word_length self.question_tokens_placeholder = tf.placeholder( tf.int32, shape=[None, None, self.config.max_word_length]) # ==== assemble pieces ==== with tf.variable_scope( "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)): self.setup_embeddings() self.setup_system() self.setup_loss() self.setup_train_op() # ==== set up training/updating procedure ==== self.saver = tf.train.Saver()
def setCrawlQueuesLocked(self, queues): """Set the file content for list of queues of given queueType on given collection.""" try: gfile.GFile(self.getCrawlQueueListFileName(), 'w').write( string.join(map(lambda(x) : x.toString(), queues), '\n')) except Exception, e: logging.error('Cannot write CrawlQueue list. %s' % e) return false
def getstatus(self, filename): '''Return the first line of a feed status file.''' try: dirname = self.cfg.getGlobalParam('FEED_STATUS_DIR') filename = os.path.join(dirname, filename) # read only the first line, the rest of the file may be too big # see bug 76929 out = gfile.GFile(filename).readline() except IOError, e: logging.error(str(e)) return "1"
def makeValid(valid_file, logs): """Make a validate file out of a list of Log objects.""" try: out = gfile.GFile(valid_file, 'w') for log in logs: out.write('%s %d\n' % (log.file, log.size)) out.close() except: logging.error('Error writing validation file %s' % valid_file) return 0 return 1
def _LoadConfigFile(config_path): """Converts Json config file into a dict. Args: config_path: Path to config file. Returns: Returns dict representation of the config file. """ with gfile.GFile(config_path, 'r') as input_file: config_data = json.load(input_file) return config_data
def ReadFile(filename, startFpos, endFpos): """Read lines from file @filename, from position @startFpos to @endFpos.""" try: fp = gfile.GFile(filename, 'r') fp.seek(startFpos) buffer = [] while fp.tell() < endFpos: buffer.append(fp.readline()) fp.close() return (C.CRAWLQUEUE_OK, buffer) except IOError, e: logging.error('Failed to read file %s. IOError: %s' % (filename, e)) return (C.CRAWLQUEUE_INTERNAL_ERROR, None)
def setLogReports(self, reportType, collection, reports): """Set the file content for list of reports of given reportType on given collection.""" try: listfile = liblog.get_report_list_filename(self.entConfig, reportType, collection) gfile.GFile(listfile, 'w').write( string.join(map(ReportToString, reports), '\n')) except IOError: logging.error('Cannot write new LogReport') return false return true
def listCrawlQueues(self): """Return a list of crawl queues.""" filename = self.getCrawlQueueListFileName() self.cqueuelock.acquire() try: try: lines = gfile.GFile(filename, 'r').readlines() except IOError, e: logging.error('Failed to read crawlqueue list. IOError: %s.' % e) return [] queues = [] for line in lines: try: queues.append(StringToCrawlQueueForm(line)) except ValueError, e: logging.error('Fail to parse one line: [%s]' % line)
def readValidFile(valid_file): """Read the file's valid file to get a list of file checkpoints.""" checkpoints = {} try: lines = gfile.GFile(valid_file, 'r').readlines() except: logging.error('Can\'t open %s' % valid_file) return None for line in lines: try: file, size_s = string.split(line) size = int(size_s) except: logging.error('Invalid line in validation file %s: %s' % (valid_file, line)) continue checkpoints[file] = size return checkpoints
def getLogReports(self, collection, reportType): """Return a list of reports of given reportType on given collection.""" listFile = liblog.get_report_list_filename(self.entConfig, reportType, collection) reports = [] try: lines = gfile.GFile(listFile, 'r').readlines() for line in lines: if line[-1] == '\n': line = line[:-1] (reportName, collection, creationDate, isFinal, reportType, reportDate, completeState, withResults, topCount, diagnosticTerms) = string.split(line, '\t', 9) reports.append(LogReport(urllib.unquote(reportName), collection, creationDate, isFinal, reportType, reportDate, completeState, withResults, topCount, diagnosticTerms)) except IOError: return [] except ValueError: return [] return reports
def WriteResult(cq_mixer): """Post process the CrawlQueueResponse buffer and write to file for adminrunner to use.""" try: rfile = gfile.GFile(cq_mixer.result_file, 'w') ifile = gfile.GFile(cq_mixer.index_file, 'w') index_buf = [] result_buf = [] queues = {} # write to data file and index file. hosts = cq_mixer.perHostUrlSorters.keys() hosts.sort() for host in hosts: urlSorter = cq_mixer.perHostUrlSorters[host] count = 0 fpos = rfile.tell() index_line = '%s\t%d\t%d' % (host, urlSorter.size(), fpos) for url in urlSorter.getUrls(): if url.has_path(): path = url.path() else: path = '' if url.has_pagerank(): pagerank = url.pagerank() else: pagerank = -1 if url.has_lastcrawledtime(): lastcrawledtime = url.lastcrawledtime() else: lastcrawledtime = 0 if url.has_nextcrawltime(): nextcrawltime = url.nextcrawltime() else: nextcrawltime = 0 if url.has_changeinterval(): changeinterval = url.changeinterval() else: changeinterval = 0 # Line format should be consistent with that in CrawlingUrl.java line = '%d\t%d\t%d\t%d\t%s\n' % (pagerank, lastcrawledtime, nextcrawltime, changeinterval, path) fpos += len(line) result_buf.append(line) count = count + 1 if count % crawlqueue_manager.PAGESIZE == 0: index_line = '%s\t%d' % (index_line, fpos) if count % 1000 == 0: # flush the buffer rfile.writelines(result_buf) result_buf = [] # post-processing one per-host urlSorter. if len(result_buf) != 0: rfile.writelines(result_buf) result_buf = [] index_line = '%s\t%d\n' % (index_line, fpos) index_buf.append(index_line) # write index file. index_buf.insert(0, '%d\n' % cq_mixer.getCaptionTime()) index_buf.insert(1, '%d\n' % cq_mixer.size()) ifile.writelines(index_buf) ifile.close() rfile.close() except Exception, e: logging.error('Exception: %s' % e) return false