def test_median(self): """ Проверка медианы""" data = [8, 1, 5, 3, 4, 2, 6, 7, 1, 9] result = median(data) self.assertEqual(result, 4.5) data.pop(0) result = median(data) self.assertEqual(result, 5.0)
def request_device_readings_quartiles(device_uuid): """ This endpoint allows clients to GET the 1st and 3rd quartile sensor reading value for a device. Mandatory Query Parameters: * type -> The type of sensor value a client is looking for * start -> The epoch start time for a sensor being created * end -> The epoch end time for a sensor being created """ if request.data: post_data = json.loads(request.data) type = post_data.get('type', None) if not type or type not in ('temperature', 'humidity'): return 'error on the required type data', 400 start = post_data.get('start', None) if not start: return 'error on the required start data', 400 end = post_data.get('end', None) if not end: return 'error on the required end data', 400 else: return 'missing data in the request parameters', 400 # Set the db that we want and open the connection if app.config['TESTING']: conn = sqlite3.connect('test_database.db') else: conn = sqlite3.connect('database.db') conn.row_factory = sqlite3.Row cur = conn.cursor() sql = 'SELECT r.value from readings r WHERE r.type = ? AND r.device_uuid = ? AND r.date_created >= ? AND r.date_created <= ?' params = [type, device_uuid, start, end] sql += 'ORDER BY r.value' # Execute the query cur.execute(sql, params) rows = [row[0] for row in cur.fetchall()] mid = len(rows) // 2 if (len(rows) % 2 == 0): # even lowerQ = median(rows[:mid]) upperQ = median(rows[mid:]) else: # odd lowerQ = median(rows[:mid]) # same as even upperQ = median(rows[mid + 1:]) return str(lowerQ) + "," + str(upperQ), 200
def CrossValidation(self, cv_method=0, **args): """Select ncomp by the requested CV method""" validation = self.model["validation"].AsDataFrame() # method 0: select the fewest components with PRESS within 1 stdev of the least PRESS (by the bootstrap) if cv_method == 0: # Use the bootstrap to find the standard deviation of the MSEP # Get the leave-one-out CV error from R: columns = min(self.num_predictors, self.ncomp_max) cv = array.array("d", validation["pred"].AsVector()) rows = len(cv) / columns cc = [] for k in range(int(columns)): b = k * rows e = b + rows cc.append(array.array("d", cv[b:e])) cv = cc # PRESS = map(lambda x: sum((cv[:,x]-self.array_actual)**2), range(cv.shape[1])) PRESS = [sum([(cv[i][j] - self.actual[j]) ** 2 for j in range(rows)]) for i in range(int(columns))] # ncomp = np.argmin(PRESS) ncomp = [i for i in range(len(PRESS)) if PRESS[i] == min(PRESS)][0] # cv_squared_error = (cv[:,ncomp]-self.array_actual)**2 cv_squared_error = [(cv[ncomp][j] - self.actual[j]) ** 2 for j in range(int(rows))] sample_space = xrange(rows) PRESS_stdev = list() # Cache random number generator and int's constructor for a speed boost _random, _int = random.random, int for i in range(100): PRESS_bootstrap = list() for j in range(100): PRESS_bootstrap.append(sum([cv_squared_error[_int(_random() * rows)] for i in sample_space])) PRESS_stdev.append(utils.std(PRESS_bootstrap)) med_stdev = utils.median(PRESS_stdev) # Maximum allowable PRESS is the minimum plus one standard deviation good_ncomp = [i for i in range(len(PRESS)) if PRESS[i] < min(PRESS) + med_stdev] self.ncomp = int(min(good_ncomp) + 1) # method 1: select the fewest components w/ PRESS less than the minimum plus a 4% of the range if cv_method == 1: # PRESS stands for predicted error sum of squares PRESS0 = validation["PRESS0"][0] PRESS = list(validation["PRESS"]) # the range is the difference between the greatest and least PRESS values PRESS_range = abs(PRESS0 - min(PRESS)) # Maximum allowable PRESS is the minimum plus a fraction of the range. max_CV_error = min(PRESS) + PRESS_range / 25 good_ncomp = [i for i in range(len(PRESS)) if PRESS[i] < max_CV_error] # choose the most parsimonious model that satisfies that criterion self.ncomp = int(min(good_ncomp) + 1)
def results_metrics(): """ Aggregator for all student results data :return: dictionary for all student data """ quizzes = Quiz.objects.all() metrics = {} for quiz in quizzes: results = Results.objects.filter(quiz=quiz) quiz_json = json.loads(quiz.quizjson) scores = [] single_metrics = {} for result in results: print result scores.append(result.score) print scores single_metrics['scores'] = scores single_metrics['name'] = quiz.name single_metrics['num_of_questions'] = len(quiz_json['questions']) mean = utils.average(scores) single_metrics['class_av'] = mean single_metrics['std_dev'] = utils.std_deviation(scores, mean) single_metrics['subject'] = quiz.subject single_metrics['high'] = max(scores) single_metrics['low'] = min(scores) single_metrics['class_median'] = utils.median(scores) metrics[quiz.name] = single_metrics return metrics
def printlevelstat(self, level, n=0): med = median((i.nleaves for i in level.itervalues())) self.logger.debug(output.green(' ' * n + "MED %f nleaves=%d len(k)=%d depth=%d"), med, level.nleaves, len(level.keys()), level.depth) for k, v in level.iteritems(): nleaves = v.nleaves depth = v.depth if v and v.clusterable: self.logger.debug( output.yellow( ' ' * n + "K %s nleaves=%d r=%.2f depth=%d"), k, nleaves, float(nleaves)/med, depth) else: self.logger.debug( output.green( ' ' * n + "K %s nleaves=%d r=%.2f depth=%d"), k, nleaves, float(nleaves)/med, depth) if v: self.printlevelstat(v, n+1)
def make_plan(self, state): curr_state = copy.deepcopy(state) if self.active_goal is None: self.active_goal = self.uncompleted_goals[0] problem = self.services.problem_generator.generate_problem( self.active_goal, curr_state) self.plan = self.services.planner(self.services.pddl.domain_path, problem) for i in range(len(self.plan)): action = self.plan[i] curr_state_hash = encode_state(curr_state) weight = float(i + 1) / len(self.plan) if self.weights[curr_state_hash][action.lower()] < weight: self.weights[curr_state_hash][action.lower()] = weight curr_state = my_apply_action_to_state(curr_state, action, self.services.parser) local_weights = list() for state_hash in self.weights: vals = list(self.weights[state_hash].values()) local_weights.extend(vals) self.state_recurrence_punish = median(local_weights) self.lookahead = min([4, int(len(self.plan) / 2)])
def run_test(self, test_cmd): ############################################################################### if self._cd: test_path, test_exe = os.path.split(test_cmd) test_path = None if not test_path else test_path else: test_exe = test_cmd test_path = None self.machine_specific_init(self._scaling_exp.threads) self.test_specific_init(test_exe, self._scaling_exp.threads) cmd = self.formulate_cmd(test_exe) results = [] with open("{}.perf.log".format( os.path.split(test_exe)[1].split(" ")[0]), "w", encoding="utf-8") as fd: fd.write(cmd + "\n\n") fd.write("ENV: \n{}\n\n".format(run_cmd_no_fail("env"))) for _ in range(self._num_runs): output = run_cmd_no_fail(cmd, from_dir=test_path, verbose=(not self._plot_friendly or self._verbose)) fd.write(output + "\n\n") results.append(self.get_time(output)) threads = self.get_threads(output) return median(results), threads
def build(points): if len(points) == 1: return Node(None, None, points[0]) else: points = sorted(points, key=lambda x: x[1]) m_idx = median(points) v_left = Tree.build(points[:m_idx+1]) v_right = Tree.build(points[m_idx+1:]) v_val = points[m_idx] return Node(v_left, v_right, v_val)
def build(points): if len(points) == 1: return Node(None, None, points[0]) else: points = sorted(points, key=lambda x: x[1]) m_idx = median(points) v_left = Tree.build(points[:m_idx + 1]) v_right = Tree.build(points[m_idx + 1:]) v_val = points[m_idx] return Node(v_left, v_right, v_val)
def clock_gets(number=100): times = [] for i in xrange(number): start = clock() x=get_message() time_taken = clock() - start times.append(time_taken) median_time = median(times) mean_time = mean(times) stddev_time = stddev(times) return (median_time,mean_time,stddev_time)
def robust_normal_generator(self, x, relative): if relative: raise KeyError("robust_gaussian is not meaningful when 'relative' is True.") else: return tf.random.truncated_normal( # TODO Should be truncated by min and max, not 2x std array_ops.shape(x), mean=median(x), stddev=iqr(x), dtype=tf.dtypes.float32, seed=None, name="robust_normal_noise_generator" )
def calc_summary_of_window(window: list, win_count: int) -> Text: """ Расчет всех статистических параметров :param win_count: счетчик окон :param window: список значений (окно) :return: """ v_min = min(window) v_max = max(window) v_avg = sum(window) / WINDOW_SIZE v_median = median(window) return ('window: ' + str(win_count), 'Max: ' + str(v_max) + '; Min: ' + str(v_min) + '; Avg: ' + str(v_avg) + '; Mdn: ' + str(v_median))
def build(pts): if len(pts) == 1: return Node2D(None, None, pts[0], Tree(pts)) else: pts = sorted(pts, key= lambda x:x[0]) m_idx = median(pts) xSmallerOrEqual = pts[:m_idx+1] xLarger = pts[m_idx+1:] v_left = Tree2D.build(xSmallerOrEqual) v_right = Tree2D.build(xLarger) associatedStr = Tree(pts) v_val = pts[m_idx] return Node2D(v_left, v_right, v_val, associatedStr)
def build(pts): if len(pts) == 1: return Node2D(None, None, pts[0], Tree(pts)) else: pts = sorted(pts, key=lambda x: x[0]) m_idx = median(pts) xSmallerOrEqual = pts[:m_idx + 1] xLarger = pts[m_idx + 1:] v_left = Tree2D.build(xSmallerOrEqual) v_right = Tree2D.build(xLarger) associatedStr = Tree(pts) v_val = pts[m_idx] return Node2D(v_left, v_right, v_val, associatedStr)
def request_device_readings_median(device_uuid): """ This endpoint allows clients to GET the median sensor reading for a device. Mandatory Query Parameters: * type -> The type of sensor value a client is looking for Optional Query Parameters * start -> The epoch start time for a sensor being created * end -> The epoch end time for a sensor being created """ if request.data: post_data = json.loads(request.data) type = post_data.get('type', None) if not type or type not in ('temperature', 'humidity'): return 'error on the required type data', 400 start = post_data.get('start', None) end = post_data.get('end', None) else: return 'missing data in the request parameters', 400 # Set the db that we want and open the connection if app.config['TESTING']: conn = sqlite3.connect('test_database.db') else: conn = sqlite3.connect('database.db') conn.row_factory = sqlite3.Row cur = conn.cursor() sql = 'SELECT r.value from readings r WHERE r.type = ? AND r.device_uuid = ?' params = [type, device_uuid] if start: sql += 'AND r.date_created >= ?' params += [start] if end: sql += 'AND r.date_created <= ?' params += [end] sql += 'ORDER BY r.value' # Execute the query cur.execute(sql, params) rows = [row[0] for row in cur.fetchall()] if len(rows) == 0: return 'No results found', 200 return str(median(rows)), 200
def analyze_logcat(self): """ __start_report12.853116__end_report We will parse the syntax here and build up a {name:[value,],} hash. Next we will compute the median value for each name. Finally we will report the geomtric mean of all of the median values. """ self.loggerdeco.debug('analyzing logcat') re_data = re.compile('.*__start_report([0-9\.]+)__end_report.*') attempt = 1 max_time = 90 # maximum time to wait for completeness score wait_time = 3 # time to wait between attempts max_attempts = max_time / wait_time results = {"tcheck3": []} pageload_metric = {'summary': 0} while attempt <= max_attempts and pageload_metric['summary'] == 0: buf = self.logcat.get() for line in buf: match = re_data.match(line) if match: numbers = match.group(1) if numbers: results["tcheck3"].append(float(numbers)) if self.fennec_crashed: # If fennec crashed, don't bother looking for pageload metric break if pageload_metric['summary'] == 0: sleep(wait_time) attempt += 1 if not results["tcheck3"]: continue # calculate score data = results["tcheck3"] pageload_metric["tcheck3"] = median(data) pageload_metric['summary'] = geometric_mean(data) if pageload_metric['summary'] == 0: self.loggerdeco.info('Unable to find pageload metric') self.loggerdeco.info("returning from logcat analyze with: %s" % pageload_metric) return pageload_metric
def get_period(kic): frequencies = [] df_list = [] filenames = utils.get_filenames(utils.BASE_PATH + str(kic), "csv") if len(filenames) <= 1: return {"period": 0.0, "fap": 0.0, "theta": 0.0, "periods": []} for idx, filename in enumerate(filenames): if (idx > 2): data = utils.pd.read_csv(utils.BASE_PATH + str(kic) + "/" + filename) try: freq = utils.get_freq_LS(data.TIME.to_numpy(),data.PDCSAP_FLUX.to_numpy(),data.EFPDC.to_numpy()) frequencies.append(freq) except Exception as e: print(e) print(idx) print(kic) df_list.append(data) df = utils.pd.DataFrame() for _df in df_list: df = df.append(_df) t = df.TIME.to_numpy() y = df.FPDC.to_numpy() dy = df.EFPDC.to_numpy() period1 = utils.get_period(t, y, dy, frequencies) period2 = utils.get_period(t, y, dy) periods = [period1, period2] nbins = 3 if period2 < 0.09 or period2 > 100: period = period1 theta = None else: try: period, theta = utils.get_period_pdm(t, y, dy, periods, nbins) except: period = utils.median(periods) theta = None df = None data = None df_list = [] return {"period": period, "theta": theta, "periods": periods}
def scanlevels(self, level, n=0): for k, v in level.iteritems(): nleaves = v.nleaves if v: # if there are descendants # XXX magic number # require more than X pages in a cluster # require some diversity in the dom path in order to create a link med = median((i.nleaves for i in v.itervalues())) if nleaves > med and nleaves > 8*(1+1.0/(n+1)) and len(k) > 7.0*math.exp(-n) \ and n >= 3: v.clusterable = True level.clusterable = False else: v.clusterable = False self.scanlevels(v, n+1)
def scanlevelspath(self, level, path, n=0): v = level[path[0]] nleaves = v.nleaves if hasattr(v, "nleaves") else len(v) if v: # if there are descendants # XXX magic number # requrire more than X pages in a cluster # require some diversity in the dom path in order to create a link med = median((i.nleaves for i in v.itervalues())) if nleaves > med and nleaves > 8*(1+1.0/(n+1)) and len(path[0]) > 7.0*math.exp(-n) \ and n >= 3: v.newclusterable = True level.newclusterable = False else: v.newclusterable = False self.scanlevelspath(v, path[1:], n+1) if not hasattr(level, "clusterable"): level.clusterable = False
def calc_median_metric(log, max_cnt_label, logscale_flag, extra_tail=0): metrics = [] for e in log.orig_err_logs: le = [(x[0], x[1]) for x in e if x[0] <= max_cnt_label] length = len(le) if length < len(e): p = (max_cnt_label - le[-1][0]) / (e[length][0] - le[-1][0]) le.append( (max_cnt_label, le[-1][1] + (e[length][1] - le[-1][1]) * p)) length += 1 le += [((max_cnt_label + 1) * (1 + extra_tail), le[-1][1])] res = 0 if logscale_flag: for i in range(1, length + 1): res += 0.5 * (le[i][1] + le[i - 1][1]) * math.log( (le[i][0] + 1.0) / (le[i - 1][0] + 1.0)) else: for i in range(1, length + 1): res += 0.5 * (le[i][1] + le[i - 1][1]) * (le[i][0] - le[i - 1][0]) metrics.append(res) return utils.median(metrics)
def run_test(self, exename): ############################################################################### self.machine_specific_init(self._scaling_exp.threads) self.test_specific_init(exename, self._scaling_exp.threads) prefix = "" if "NUMA_PREFIX" not in os.environ else "{} ".format( os.environ["NUMA_PREFIX"]) cmd = "{}./{} {}".format( prefix, exename, " ".join([ str(item) for item in self._scaling_exp.values(incl_threads=False) ])) results = [] with open("{}.perf.log".format(exename), "w") as fd: fd.write(cmd + "\n\n") fd.write("ENV: \n{}\n\n".format(run_cmd_no_fail("env"))) for _ in range(self._num_runs): output = run_cmd_no_fail(cmd, verbose=not self._plot_friendly) fd.write(output + "\n\n") results.append(self.get_time(output)) threads = self.get_threads(output) return median(results), threads
def CrossValidation(self, cv_method=0, **args): '''Select ncomp by the requested CV method''' validation = self.model['validation'].AsDataFrame() #method 0: select the fewest components with PRESS within 1 stdev of the least PRESS (by the bootstrap) if cv_method == 0: #Use the bootstrap to find the standard deviation of the MSEP #Get the leave-one-out CV error from R: columns = min(self.num_predictors, self.ncomp_max) cv = array.array('d', validation['pred'].AsVector()) rows = len(cv) / columns cc = [] for k in range(int(columns)): b = k * rows e = b + rows cc.append(array.array('d', cv[b:e])) cv = cc #PRESS = map(lambda x: sum((cv[:,x]-self.array_actual)**2), range(cv.shape[1])) PRESS = [ sum([(cv[i][j] - self.actual[j])**2 for j in range(rows)]) for i in range(int(columns)) ] #ncomp = np.argmin(PRESS) ncomp = [i for i in range(len(PRESS)) if PRESS[i] == min(PRESS)][0] #cv_squared_error = (cv[:,ncomp]-self.array_actual)**2 cv_squared_error = [(cv[ncomp][j] - self.actual[j])**2 for j in range(int(rows))] sample_space = xrange(rows) PRESS_stdev = list() #Cache random number generator and int's constructor for a speed boost _random, _int = random.random, int for i in range(100): PRESS_bootstrap = list() for j in range(100): PRESS_bootstrap.append( sum([ cv_squared_error[_int(_random() * rows)] for i in sample_space ])) PRESS_stdev.append(utils.std(PRESS_bootstrap)) med_stdev = utils.median(PRESS_stdev) #Maximum allowable PRESS is the minimum plus one standard deviation good_ncomp = [ i for i in range(len(PRESS)) if PRESS[i] < min(PRESS) + med_stdev ] self.ncomp = int(min(good_ncomp) + 1) #method 1: select the fewest components w/ PRESS less than the minimum plus a 4% of the range if cv_method == 1: #PRESS stands for predicted error sum of squares PRESS0 = validation['PRESS0'][0] PRESS = list(validation['PRESS']) #the range is the difference between the greatest and least PRESS values PRESS_range = abs(PRESS0 - min(PRESS)) #Maximum allowable PRESS is the minimum plus a fraction of the range. max_CV_error = min(PRESS) + PRESS_range / 25 good_ncomp = [ i for i in range(len(PRESS)) if PRESS[i] < max_CV_error ] #choose the most parsimonious model that satisfies that criterion self.ncomp = int(min(good_ncomp) + 1)
def finish(self): f = open("pos_variances.txt","w") for i in range(len(self.posnames)): mean = utils.mean(self.counts[i]) f.write(self.posnames[i] + "\t" + str(mean) + "\t" + str(utils.median(self.counts[i])) + "\t" + str(utils.variance(self.counts[i])) + "\t" + str(utils.moment(self.counts[i],mean,3)) + "\t" + str(utils.moment(self.counts[i],mean,4)) + "\t" + str(len([x for x in self.counts[i] if x > 0])) + "\n")
def pdf2heads(opts, args): global Verbose_flag xmltag = True highlight = False titleonly = False authonly = False Verbose_flag = False look_for_all_caps_headings = False global automatic_rerunning global Found_abstract global Found_Sammanfattning start_to_exclude = False for o, a in opts: if (o == '--noxml'): xmltag = False elif (o == '--highlight'): highlight = True if (o == '--title'): titleonly = True elif (o == '--author'): authonly = True elif (o == '--verbose'): Verbose_flag = True print "Verbose_flag is on" elif (o == '--caps'): print "looking for ABSTRACT and other headers in all caps" look_for_all_caps_headings = True if automatic_rerunning: print "looking for ABSTRACT and other headers in all caps" look_for_all_caps_headings = True tree = pdf2etree(args) # find title - look on the first page of the document at the first block of text on the page page = 1 block = 1 title_node = None while True: try: trial_title_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: print "trial_title_node:" print trial_title_node # title_headers = trial_title_node.xpath(".//TOKEN[@font-size > {0}]".format(23)) # note that the Title is assumed to be 20 points or larger in size title_headers = trial_title_node.xpath( ".//TOKEN[@font-size > {0}]".format(20)) if Verbose_flag: print "title_headers:" print title_headers title_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in title_headers ]) if len(title_head_txt): print "<Title>" + title_head_txt + "</Title>" title_node = trial_title_node next_block = block + 1 break except IndexError: page += 1 else: break if page > 2: # probably not going to find it now break # find subtitle - note that a subtitle is option - start on the 2nd page and second block on the page page = 2 block = 2 next_block = 2 subtitle_node = None while True: try: trial_subtitle_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: print "trial_subtitle_node:" print trial_subtitle_node # the Subtitle is assumed to be larger than 19 points subtitle_headers = trial_subtitle_node.xpath( ".//TOKEN[@font-size > {0}]".format(19)) if Verbose_flag: print "subtitle_headers:" print subtitle_headers if len(subtitle_headers) == 0: next_block = 2 break subtitle_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in subtitle_headers ]) if len(subtitle_head_txt): subtitle_node = trial_subtitle_node print "<Subtitle>" + title_head_txt + "</Subtitle>" next_block = 3 break except IndexError: block += 1 else: break if block > 4: # probably not going to find it now break # find author - on inside cover page = 2 block = next_block auth_node = None while True: try: trial_auth_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: print "trial_auth_node:" print trial_auth_node # the author's name(s) is(are) assumed to be 15 points or larger in size auth_headers = trial_auth_node.xpath( ".//TOKEN[@font-size > {0}]".format(15)) if Verbose_flag: print "auth_headers:" print auth_headers auth_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in auth_headers ]) if len(title_head_txt): auth_node = trial_auth_node break except IndexError: block += 1 else: break if block > 4: # probably not going to find it now break font_sizes = tree.xpath('//TOKEN/@font-size') mean_font_size = mean(font_sizes) median_font_size = median(font_sizes) # print "Median Font Size (i.e. body text):", median_font_size font_colors = tree.xpath('//TOKEN/@font-color') font_color_hash = {} for fc in font_colors: try: font_color_hash[fc] += 1 except KeyError: font_color_hash[fc] = 1 sortlist = [(v, k) for k, v in font_color_hash.iteritems()] sortlist.sort(reverse=True) main_font_color = sortlist[0][1] head_txts = [] stop = False page = 0 Found_abstract = False Found_Sammanfattning = False for page_node in tree.xpath('//PAGE'): page = page + 1 block_number = 0 for block_node in page_node.xpath('.//BLOCK'): block_number = block_number + 1 if xmltag: if block_node == title_node: st = "<title>" et = "</title>" if block_node == subtitle_node: st = "<subtitle>" et = "</subtitle>" elif block_node == auth_node: st = "<author>" et = "</author>" else: st = "<heading>" et = "</heading>" if highlight: st = "\033[0;32m{0}\033[0m".format(st) et = "\033[0;32m{0}\033[0m".format(et) else: st = et = "" if block_node == title_node and authonly: continue # note that the assumption that the Abstract headings is set in a larger font then the median font sized used on a page, will not find # abstracts of Aalto university - as they set the word ABSTRACT in a slightly larger size font as used for the rest of the text, but they do set it in all CAPs if look_for_all_caps_headings: headers = block_node.xpath( ".//TOKEN[@font-size > {0} or @bold = 'yes' or @font-color != '{1}']" .format(mean_font_size, main_font_color)) else: headers = block_node.xpath( ".//TOKEN[@font-size > {0} or @bold = 'yes' or @font-color != '{1}']" .format(mean_font_size * 1.05, main_font_color)) head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in headers ]) if head_txt in text_start_to_exclude: start_to_exclude = True head_txt = filter_headings(head_txt) if len(head_txt) and (not start_to_exclude): head_txts.append("{0}{1}{2}".format(st, head_txt, et)) if head_txt.find("Abstract") >= 0 or head_txt.find( "ABSTRACT") >= 0: if not Found_abstract: print "Abstract (en):" output_blocks_on_page(page_node, block_number, page) Found_abstract = True break if head_txt.find("Sammanfattning") >= 0 or head_txt.find( "SAMMANFATTNING") >= 0: if not Found_Sammanfattning: print "Sammanfattning (sv):" output_blocks_on_page(page_node, block_number, page) Found_Sammanfattning = True break if head_txt.find("Abstrakt") >= 0 or head_txt.find( "ABSTRAKT") >= 0: if not Found_Sammanfattning: print "Abstrakt (sv):" output_blocks_on_page(page_node, block_number, page) Found_Sammanfattning = True break if head_txt.find("Referat") >= 0 or head_txt.find("REFERAT") >= 0: if not Found_Sammanfattning: print "Referat (sv):" output_blocks_on_page(page_node, block_number, page) Found_Sammanfattning = True break # # if head_txt.find("Abstracto(sp)") >= 0: # print "Abstracto (sp):" # output_blocks_on_page(page_node, block_number, page) # break # # if head_txt.find("Abstrait (fr)") >= 0: # print "Abstrait (fr):" # output_blocks_on_page(page_node, block_number, page) # break if block_node == title_node and titleonly: stop = True break elif block_node == auth_node and authonly: stop = True break if stop: break for txt in head_txts: sys.stdout.writelines([txt, '\n'])
def run_job(self): is_test_completed = False if not self.install_local_pages(): self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Aborting test - Could not install local pages on phone.', TreeherderStatus.EXCEPTION) return is_test_completed if not self.create_profile(): self.add_failure(self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Aborting test - Could not run Fennec.', TreeherderStatus.BUSTED) return is_test_completed perfherder_options = PerfherderOptions(self.perfherder_options, repo=self.build.tree) is_test_completed = True testcount = len(self._urls.keys()) for testnum, (testname, url) in enumerate(self._urls.iteritems(), 1): self.loggerdeco = self.loggerdeco.clone( extradict={ 'phoneid': self.phone.id, 'buildid': self.build.id, 'testname': testname }, extraformat= 'S1S2TestJob|%(phoneid)s|%(buildid)s|%(testname)s|%(message)s') self.dm._logger = self.loggerdeco self.loggerdeco.info('Running test (%d/%d) for %d iterations', testnum, testcount, self._iterations) command = None for attempt in range(1, self.stderrp_attempts + 1): # dataset is a list of the measurements made for the # iterations for this test. # # An empty item in the dataset list represents a # failure to obtain any measurement for that # iteration. # # It is possible for an item in the dataset to have an # uncached value and not have a corresponding cached # value if the cached test failed to record the # values. iteration = 0 dataset = [] for iteration in range(1, self._iterations + 1): # Calling svc power stayon true will turn on the # display for at least some devices if it has # turned off. self.dm.power_on() command = self.worker_subprocess.process_autophone_cmd( test=self, require_ip_address=url.startswith('http')) if command['interrupt']: self.handle_test_interrupt(command['reason'], command['test_result']) break self.update_status(message='Attempt %d/%d for Test %d/%d, ' 'run %d, for url %s' % (attempt, self.stderrp_attempts, testnum, testcount, iteration, url)) if not self.create_profile(): self.add_failure(self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Failed to create profile', TreeherderStatus.TESTFAILED) continue measurement = self.runtest(url) if not measurement: self.loggerdeco.warning( '%s %s Attempt %s Failed to get uncached measurement.', testname, url, attempt) continue self.add_pass(url) dataset.append({'uncached': measurement}) measurement = self.runtest(url) if not measurement: self.loggerdeco.warning( '%s %s Attempt %s Failed to get cached measurement.', testname, url, attempt) continue self.add_pass(url) dataset[-1]['cached'] = measurement if self.is_stderr_below_threshold( ('throbberstart', 'throbberstop'), dataset, self.stderrp_accept): self.loggerdeco.info( 'Accepted test (%d/%d) after %d of %d iterations', testnum, testcount, iteration, self._iterations) break if command and command['interrupt']: break measurements = len(dataset) if measurements > 0 and self._iterations != measurements: self.add_failure(self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Failed to get all measurements', TreeherderStatus.TESTFAILED) elif measurements == 0: # If we have not gotten a single measurement at this point, # just bail and report the failure rather than wasting time # continuing more attempts. self.add_failure(self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'No measurements detected.', TreeherderStatus.BUSTED) self.loggerdeco.info( 'Failed to get measurements for test %s after %d/%d attempt ' 'of %d iterations', testname, attempt, self.stderrp_attempts, self._iterations) self.worker_subprocess.mailer.send( '%s %s failed for Build %s %s on %s %s' % (self.__class__.__name__, testname, self.build.tree, self.build.id, utils.host(), self.phone.id), 'No measurements were detected for test %s.\n\n' 'Job %s\n' 'Host %s\n' 'Phone %s\n' 'Repository %s\n' 'Build %s\n' 'Revision %s\n' % (testname, self.job_url, utils.host(), self.phone.id, self.build.tree, self.build.id, self.build.changeset)) break if self.is_stderr_below_threshold( ('throbberstart', 'throbberstop'), dataset, self.stderrp_reject): rejected = False else: rejected = True self.loggerdeco.info( 'Rejected test (%d/%d) after %d/%d iterations', testnum, testcount, iteration, self._iterations) self.loggerdeco.debug('publishing results') perfherder_values = {'geometric_mean': 0} metric_keys = ['throbberstart', 'throbberstop', 'throbbertime'] cache_names = {'uncached': 'first', 'cached': 'second'} cache_keys = cache_names.keys() for metric_key in metric_keys: perfherder_values[metric_key] = {'geometric_mean': 0} for cache_key in cache_keys: perfherder_values[metric_key][cache_key] = { 'median': 0, 'values': [] } for datapoint in dataset: for cache_key in datapoint: starttime = datapoint[cache_key]['starttime'] throbberstart = datapoint[cache_key]['throbberstart'] throbberstop = datapoint[cache_key]['throbberstop'] self.report_results( starttime=starttime, tstrt=throbberstart, tstop=throbberstop, testname=testname, cache_enabled=(cache_key == 'cached'), rejected=rejected) perfherder_values['throbberstart'][cache_key][ 'values'].append(throbberstart - starttime) perfherder_values['throbberstop'][cache_key][ 'values'].append(throbberstop - starttime) perfherder_values['throbbertime'][cache_key][ 'values'].append(throbberstop - throbberstart) test_values = [] for metric_key in metric_keys: for cache_key in cache_keys: perfherder_values[metric_key][cache_key][ 'median'] = utils.median( perfherder_values[metric_key][cache_key] ['values']) perfherder_values[metric_key][ 'geometric_mean'] = utils.geometric_mean([ perfherder_values[metric_key]['uncached'] ['median'], perfherder_values[metric_key]['cached']['median'] ]) test_values.append( perfherder_values[metric_key]['geometric_mean']) perfherder_suite = PerfherderSuite( name=testname, value=utils.geometric_mean(test_values), options=perfherder_options) for metric_key in metric_keys: for cache_key in cache_keys: cache_name = cache_names[cache_key] subtest_name = "%s %s" % (metric_key, cache_name) perfherder_suite.add_subtest( subtest_name, perfherder_values[metric_key][cache_key]['median'], options=perfherder_options) self.perfherder_artifact = PerfherderArtifact() self.perfherder_artifact.add_suite(perfherder_suite) self.loggerdeco.debug("PerfherderArtifact: %s", self.perfherder_artifact) if not rejected: break if command and command['interrupt']: break return is_test_completed
seasoned_snr = snr(image, seasoned_image) print("Salt and pepper SNR: " + str(seasoned_snr) + " dB") # Generate Gaussian noise gaussed_image = random_noise(image, mode='gaussian', seed=0) gaussed_snr = snr(image, gaussed_image) print("Gaussian SNR: " + str(gaussed_snr) + " dB") # Apply a median filter over image # 5x5 averaging filter kernel (low pass) avg_kernel = np.ones((5, 5)) / 25.0 averaged_simage = conv(seasoned_image, avg_kernel) averaged_gimage = conv(gaussed_image, avg_kernel) # Apply a median filter over image median_simage = median(seasoned_image, 5) median_gimage = median(gaussed_image, 5) # Sobel edge detection filters sx_kernel = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]] sy_kernel = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]] # Sobel edge filter on noisy images sx = np.abs(conv(seasoned_image, sx_kernel)) sx = threshold(sx, 0.3) sy = np.abs(conv(seasoned_image, sy_kernel)) sy = threshold(sy, 0.3) sobel_simage = sx + sy sx = np.abs(conv(gaussed_image, sx_kernel)) sx = threshold(sx, 0.3)
def analyze_logcat(self): """ I/GeckoDump( 2284): __start_tp_report I/GeckoDump( 2284): _x_x_mozilla_page_load I/GeckoDump( 2284): _x_x_mozilla_page_load_details I/GeckoDump( 2284): |i|pagename|runs| I/GeckoDump( 2284): |0;amazon.com/www.amazon.com/index.html;2386;1146 I/GeckoDump( 2284): |1;m.yahoo.co.jp/www.yahoo.co.jp/index.html;1724;901 I/GeckoDump( 2284): |2;m.accuweather.com/www.accuweather.com/index.html;228;231 I/GeckoDump( 2284): |3;m.yandex.ru/www.yandex.ru/index.html;6043;2984 I/GeckoDump( 2284): |4;m.wikipedia.com/en.m.wikipedia.org/index.html;734;385 I/GeckoDump( 2284): |5;m.espn.com/m.espn.go.com/index.html;576;419 I/GeckoDump( 2284): |6;m.bbc.co.uk/www.bbc.co.uk/mobile/index.html;349;229 I/GeckoDump( 2284): __end_tp_report I/GeckoDump( 2284): __start_cc_report I/GeckoDump( 2284): _x_x_mozilla_cycle_collect,3390 I/GeckoDump( 2284): __end_cc_report I/GeckoDump( 2284): __startTimestamp1433438438092__endTimestamp We will parse the syntax here and build up a {name:[value,],} hash. Next we will compute the median value for each name. Finally we will report the geoemtric mean of all of the median values. """ self.loggerdeco.debug('analyzing logcat') re_page_data = re.compile('.*\|[0-9];([a-zA-Z0-9\.\/\-]+);([0-9;]+).*') re_end_report = re.compile('.*__end_tp_report.*') attempt = 1 max_time = 90 # maximum time to wait for completeness score wait_time = 3 # time to wait between attempts max_attempts = max_time / wait_time results = {} pageload_metric = {'summary': 0} while attempt <= max_attempts and pageload_metric['summary'] == 0: buf = self.logcat.get() for line in buf: self.loggerdeco.debug('analyze_logcat: %s' % line) if re_end_report.match(line): # calculate score data = [] for page in results: data.append(median(results[page])) pageload_metric[page] = median(results[page]) pageload_metric['summary'] = geometric_mean(data) break match = re_page_data.match(line) if match: page_name = match.group(1) numbers = match.group(2) if page_name and numbers: page_name = page_name.split('/')[0] numbers = [float(x) for x in numbers.split(';')] results[page_name] = numbers if self.fennec_crashed: # If fennec crashed, don't bother looking for pageload metric break if pageload_metric['summary'] == 0: sleep(wait_time) attempt += 1 if pageload_metric['summary'] == 0: self.loggerdeco.warning('Unable to find pageload metric') return pageload_metric
def __do_divide_conquer(self, P): if len(P) <= 3: return self.__do_graham_scan(P) # divide m = utils.median([p.X for p in P]) PL = [p for p in P if p.X <= m] PR = [p for p in P if p.X > m] QL = self.__do_divide_conquer(PL) QR = self.__do_divide_conquer(PR) # calculate polar angle i = np.argmin([p.Y for p in QL]) j = np.argmin([p.Y for p in QR]) if QL[i].Y > QR[j].Y: tmp = QL QL = QR QR = tmp i = j X = Point(QL[i].X + 1, QL[i].Y) O = QL[i] QL_pa = utils.calc_polar_angle(O, X, QL) QR_pa = utils.calc_polar_angle(O, X, QR) s = np.argmin(QR_pa) # min polar angle in QR t = np.argmax(QR_pa) # max polar angle in QR # merge QL = np.concatenate((QL[i:], QL[:i])) # arrange in ascending polar angle order QL_pa = np.concatenate((QL_pa[i:], QL_pa[:i])) if s < t: QR_1 = QR[s:t] QR_2 = np.concatenate((QR[t:], QR[:s]))[::-1] QR_pa_1 = QR_pa[s:t] QR_pa_2 = np.concatenate((QR_pa[t:], QR_pa[:s]))[::-1] else: QR_1 = np.concatenate((QR[s:], QR[:t])) QR_2 = QR[t:s][::-1] QR_pa_1 = np.concatenate((QR_pa[s:], QR_pa[:t])) QR_pa_2 = QR_pa[t:s][::-1] l_len = len(QL) r_len_1 = len(QR_1) r_len_2 = len(QR_2) l_it = 0 r_it_1 = 0 r_it_2 = 0 W = list() while True: if l_it >= l_len: if r_it_1 >= r_len_1: # extend r_2 W.extend(QR_2[r_it_2:]) r_it_2 = r_len_2 break elif r_it_2 >= r_len_2: # extend r_1 W.extend(QR_1[r_it_1:]) r_it_1 = r_len_1 break else: # append r_1 and r_2 if QR_pa_1[r_it_1] < QR_pa_2[r_it_2]: W.append(QR_1[r_it_1]) r_it_1 += 1 else: W.append(QR_2[r_it_2]) r_it_2 += 1 elif r_it_1 >= r_len_1: if r_it_2 >= r_len_2: # extend l W.extend(QL[l_it:]) l_it = l_len break else: # append l and r_2 if QL_pa[l_it] < QR_pa_2[r_it_2]: W.append(QL[l_it]) l_it += 1 else: W.append(QR_2[r_it_2]) r_it_2 += 1 elif r_it_2 >= r_len_2: if QL_pa[l_it] < QR_pa_1[r_it_1]: W.append(QL[l_it]) l_it += 1 else: W.append(QR_1[r_it_1]) r_it_1 += 1 else: # append l, r_1 and r_2 if QL_pa[l_it] < QR_pa_1[r_it_1] and QL_pa[l_it] < QR_pa_2[r_it_2]: W.append(QL[l_it]) l_it += 1 elif QR_pa_1[r_it_1] < QL_pa[l_it] and QR_pa_1[r_it_1] < QR_pa_2[r_it_2]: W.append(QR_1[r_it_1]) r_it_1 += 1 else: W.append(QR_2[r_it_2]) r_it_2 += 1 return self.__do_graham_scan(W, sort=False)
def pdf2heads(opts, args): xmltag = True highlight = False titleonly = False authonly = False for o, a in opts: if (o == '--noxml'): xmltag = False elif (o == '--highlight'): highlight = True if (o == '--title'): titleonly = True elif (o == '--author'): authonly = True tree = pdf2etree(args) # find title page = 1 block = 1 title_node = None while True: try: title_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format(page, block))[0] except IndexError: page+=1 else: break if page > 2: # probably not going to find it now break # find author page = 1 block = 2 auth_node = None while True: try: auth_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format(page, block))[0] except InbdexError: block+=1 else: break if block > 4: # probably not going to find it now break font_sizes = tree.xpath('//TOKEN/@font-size') mean_font_size = mean(font_sizes) median_font_size = median(font_sizes) #print "Median Font Size (i.e. body text):", median_font_size font_colors = tree.xpath('//TOKEN/@font-color') font_color_hash = {} for fc in font_colors: try: font_color_hash[fc]+=1 except KeyError: font_color_hash[fc] = 1 sortlist = [(v,k) for k,v in font_color_hash.iteritems()] sortlist.sort(reverse=True) main_font_color = sortlist[0][1] head_txts = [] stop = False for page_node in tree.xpath('//PAGE'): for block_node in page_node.xpath('.//BLOCK'): if xmltag: if block_node == title_node: st = "<title>" et = "</title>" elif block_node == auth_node: st = "<author>" et = "</author>" else: st = "<heading>" et = "</heading>" if highlight: st = "\033[0;32m{0}\033[0m".format(st) et = "\033[0;32m{0}\033[0m".format(et) else: st = et = "" if block_node == title_node and authonly: continue headers = block_node.xpath(".//TOKEN[@font-size > {0} or @bold = 'yes' or @font-color != '{1}']".format(mean_font_size*1.05, main_font_color)) head_txt = ' '.join([etree.tostring(el, method='text', encoding="UTF-8") for el in headers]) if len(head_txt): head_txts.append("{0}{1}{2}".format(st, head_txt, et)) if block_node == title_node and titleonly: stop = True break elif block_node == auth_node and authonly: stop = True break if stop: break for txt in head_txts: sys.stdout.writelines([txt, '\n']) def main(argv=None): if argv is None: argv = sys.argv[1:] try: try: opts, args = getopt.getopt(argv, "ht", ["help", "test", "noxml", "highlight", "title", "author"]) except getopt.error as msg: raise UsageError(msg) for o, a in opts: if (o in ['-h', '--help']): # print help and exit sys.stdout.write(__doc__) sys.stdout.flush() return 0 pdf2heads(opts, args) except UsageError as err: print >>sys.stderr, err.msg print >>sys.stderr, "for help use --help" return 2 except ConfigError, err: sys.stderr.writelines([str(err.msg),'\n']) sys.stderr.flush() return 1
def pdf2heads(opts, args): xmltag = True highlight = False titleonly = False authonly = False for o, a in opts: if (o == '--noxml'): xmltag = False elif (o == '--highlight'): highlight = True if (o == '--title'): titleonly = True elif (o == '--author'): authonly = True tree = pdf2etree(args) # find title page = 1 block = 1 title_node = None while True: try: title_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format(page, block))[0] except IndexError: page+=1 else: break if page > 2: # probably not going to find it now break # find author page = 1 block = 2 auth_node = None while True: try: auth_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format(page, block))[0] except InbdexError: block+=1 else: break if block > 4: # probably not going to find it now break font_sizes = tree.xpath('//TOKEN/@font-size') mean_font_size = mean(font_sizes) median_font_size = median(font_sizes) #print "Median Font Size (i.e. body text):", median_font_size font_colors = tree.xpath('//TOKEN/@font-color') font_color_hash = {} for fc in font_colors: try: font_color_hash[fc]+=1 except KeyError: font_color_hash[fc] = 1 sortlist = [(v,k) for k,v in font_color_hash.iteritems()] sortlist.sort(reverse=True) main_font_color = sortlist[0][1] head_txts = [] stop = False for page_node in tree.xpath('//PAGE'): for block_node in page_node.xpath('.//BLOCK'): if xmltag: if block_node == title_node: st = "<title>" et = "</title>" elif block_node == auth_node: st = "<author>" et = "</author>" else: st = "<heading>" et = "</heading>" if highlight: st = "\033[0;32m{0}\033[0m".format(st) et = "\033[0;32m{0}\033[0m".format(et) else: st = et = "" if block_node == title_node and authonly: continue headers = block_node.xpath(".//TOKEN[@font-size > {0} or @bold = 'yes' or @font-color != '{1}']".format(mean_font_size*1.05, main_font_color)) head_txt = ' '.join([etree.tostring(el, method='text', encoding="UTF-8") for el in headers]) if len(head_txt): head_txts.append("{0}{1}{2}".format(st, head_txt, et)) if block_node == title_node and titleonly: stop = True break elif block_node == auth_node and authonly: stop = True break if stop: break for txt in head_txts: sys.stdout.writelines([txt, '\n'])
def analyze_logcat(self): """ I/GeckoDump( 2284): __start_tp_report I/GeckoDump( 2284): _x_x_mozilla_page_load I/GeckoDump( 2284): _x_x_mozilla_page_load_details I/GeckoDump( 2284): |i|pagename|runs| I/GeckoDump( 2284): |0;amazon.com/www.amazon.com/index.html;2386;1146 I/GeckoDump( 2284): |1;m.yahoo.co.jp/www.yahoo.co.jp/index.html;1724;901 I/GeckoDump( 2284): |2;m.accuweather.com/www.accuweather.com/index.html;228;231 I/GeckoDump( 2284): |3;m.yandex.ru/www.yandex.ru/index.html;6043;2984 I/GeckoDump( 2284): |4;m.wikipedia.com/en.m.wikipedia.org/index.html;734;385 I/GeckoDump( 2284): |5;m.espn.com/m.espn.go.com/index.html;576;419 I/GeckoDump( 2284): |6;m.bbc.co.uk/www.bbc.co.uk/mobile/index.html;349;229 I/GeckoDump( 2284): __end_tp_report I/GeckoDump( 2284): __start_cc_report I/GeckoDump( 2284): _x_x_mozilla_cycle_collect,3390 I/GeckoDump( 2284): __end_cc_report I/GeckoDump( 2284): __startTimestamp1433438438092__endTimestamp We will parse the syntax here and build up a {name:[value,],} hash. Next we will compute the median value for each name. Finally we will report the geoemtric mean of all of the median values. """ self.loggerdeco.debug('analyzing logcat') re_page_data = re.compile( r'.*\|[0-9];([a-zA-Z0-9\.\/\-]+);([0-9;]+).*') re_end_report = re.compile(r'.*__end_tp_report.*') attempt = 1 max_time = 180 # maximum time to wait for tp report wait_time = 3 # time to wait between attempts max_attempts = max_time / wait_time results = {} pageload_metric = {'summary': 0} while attempt <= max_attempts and pageload_metric['summary'] == 0: buf = self.worker_subprocess.logcat.get() for line in buf: self.loggerdeco.debug('analyze_logcat: %s', line) if re_end_report.match(line): # calculate score data = [] for page in results: data.append(median(results[page])) # median of each page, ignoring the first run pageload_metric[page] = median(results[page][1:]) pageload_metric['summary'] = geometric_mean(data) break match = re_page_data.match(line) if match: page_name = match.group(1) numbers = match.group(2) if page_name and numbers: page_name = page_name.split('/')[0] numbers = [float(x) for x in numbers.split(';')] results[page_name] = numbers if self.handle_crashes(): # If fennec crashed, don't bother looking for pageload metric break if pageload_metric['summary'] == 0: sleep(wait_time) attempt += 1 if pageload_metric['summary'] == 0: self.loggerdeco.warning('Unable to find pageload metric') return pageload_metric
def run_job(self): is_test_completed = False if not self.install_local_pages(): self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Aborting test - Could not install local pages on phone.', TreeherderStatus.EXCEPTION) return is_test_completed if not self.create_profile(): self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Aborting test - Could not run Fennec.', TreeherderStatus.BUSTED) return is_test_completed perfherder_options = PerfherderOptions(self.perfherder_options, repo=self.build.tree) is_test_completed = True testcount = len(self._urls.keys()) for testnum, (testname, url) in enumerate(self._urls.iteritems(), 1): self.loggerdeco = self.loggerdeco.clone( extradict={ 'repo': self.build.tree, 'buildid': self.build.id, 'buildtype': self.build.type, 'sdk': self.phone.sdk, 'platform': self.build.platform, 'testname': testname }, extraformat='S1S2TestJob %(repo)s %(buildid)s %(buildtype)s %(sdk)s %(platform)s %(testname)s %(message)s') self.dm._logger = self.loggerdeco self.loggerdeco.info('Running test (%d/%d) for %d iterations', testnum, testcount, self._iterations) command = None for attempt in range(1, self.stderrp_attempts+1): # dataset is a list of the measurements made for the # iterations for this test. # # An empty item in the dataset list represents a # failure to obtain any measurement for that # iteration. # # It is possible for an item in the dataset to have an # uncached value and not have a corresponding cached # value if the cached test failed to record the # values. iteration = 0 dataset = [] for iteration in range(1, self._iterations+1): # Calling svc power stayon true will turn on the # display for at least some devices if it has # turned off. self.dm.power_on() command = self.worker_subprocess.process_autophone_cmd( test=self, require_ip_address=url.startswith('http')) if command['interrupt']: self.handle_test_interrupt(command['reason'], command['test_result']) break self.update_status(message='Attempt %d/%d for Test %d/%d, ' 'run %d, for url %s' % (attempt, self.stderrp_attempts, testnum, testcount, iteration, url)) if not self.create_profile(): self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Failed to create profile', TreeherderStatus.TESTFAILED) continue measurement = self.runtest(url) if not measurement: self.loggerdeco.warning( '%s %s Attempt %s Failed to get uncached measurement.', testname, url, attempt) continue self.add_pass(url, text='uncached') dataset.append({'uncached': measurement}) measurement = self.runtest(url) if not measurement: self.loggerdeco.warning( '%s %s Attempt %s Failed to get cached measurement.', testname, url, attempt) continue self.add_pass(url, text='cached') dataset[-1]['cached'] = measurement if self.is_stderr_below_threshold( ('throbberstart', 'throbberstop'), dataset, self.stderrp_accept): self.loggerdeco.info( 'Accepted test (%d/%d) after %d of %d iterations', testnum, testcount, iteration, self._iterations) break if command and command['interrupt']: break measurements = len(dataset) if measurements > 0 and self._iterations != measurements: self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'Failed to get all measurements', TreeherderStatus.TESTFAILED) elif measurements == 0: # If we have not gotten a single measurement at this point, # just bail and report the failure rather than wasting time # continuing more attempts. self.add_failure( self.name, TestStatus.TEST_UNEXPECTED_FAIL, 'No measurements detected.', TreeherderStatus.BUSTED) self.loggerdeco.info( 'Failed to get measurements for test %s after %d/%d attempt ' 'of %d iterations', testname, attempt, self.stderrp_attempts, self._iterations) self.worker_subprocess.mailer.send( '%s %s failed for Build %s %s on %s %s' % (self.__class__.__name__, testname, self.build.tree, self.build.id, utils.host(), self.phone.id), 'No measurements were detected for test %s.\n\n' 'Job %s\n' 'Host %s\n' 'Phone %s\n' 'Repository %s\n' 'Build %s\n' 'Revision %s\n' % (testname, self.job_url, utils.host(), self.phone.id, self.build.tree, self.build.id, self.build.changeset)) break if self.is_stderr_below_threshold( ('throbberstart', 'throbberstop'), dataset, self.stderrp_reject): rejected = False else: rejected = True self.loggerdeco.info( 'Rejected test (%d/%d) after %d/%d iterations', testnum, testcount, iteration, self._iterations) self.loggerdeco.debug('publishing results') perfherder_values = {'geometric_mean': 0} metric_keys = ['throbberstart', 'throbberstop', 'throbbertime'] cache_names = {'uncached': 'first', 'cached': 'second'} cache_keys = cache_names.keys() for metric_key in metric_keys: perfherder_values[metric_key] = {'geometric_mean': 0} for cache_key in cache_keys: perfherder_values[metric_key][cache_key] = {'median': 0, 'values': []} for datapoint in dataset: for cache_key in datapoint: starttime = datapoint[cache_key]['starttime'] throbberstart = datapoint[cache_key]['throbberstart'] throbberstop = datapoint[cache_key]['throbberstop'] self.report_results( starttime=starttime, tstrt=throbberstart, tstop=throbberstop, testname=testname, cache_enabled=(cache_key == 'cached'), rejected=rejected) perfherder_values['throbberstart'][cache_key]['values'].append( throbberstart - starttime) perfherder_values['throbberstop'][cache_key]['values'].append( throbberstop - starttime) perfherder_values['throbbertime'][cache_key]['values'].append( throbberstop - throbberstart) test_values = [] for metric_key in metric_keys: for cache_key in cache_keys: perfherder_values[metric_key][cache_key]['median'] = utils.median( perfherder_values[metric_key][cache_key]['values']) perfherder_values[metric_key]['geometric_mean'] = utils.geometric_mean( [perfherder_values[metric_key]['uncached']['median'], perfherder_values[metric_key]['cached']['median']]) test_values.append(perfherder_values[metric_key]['geometric_mean']) perfherder_suite = PerfherderSuite(name=testname, value=utils.geometric_mean(test_values), options=perfherder_options) for metric_key in metric_keys: for cache_key in cache_keys: cache_name = cache_names[cache_key] subtest_name = "%s %s" % (metric_key, cache_name) perfherder_suite.add_subtest( subtest_name, perfherder_values[metric_key][cache_key]['median'], options=perfherder_options) self.perfherder_artifact = PerfherderArtifact() self.perfherder_artifact.add_suite(perfherder_suite) self.loggerdeco.debug("PerfherderArtifact: %s", self.perfherder_artifact) if not rejected: break if command and command['interrupt']: break return is_test_completed
def extract_dataframes(): for pid in pids: print () print ('pid: ', pid) tac_reading = pd.read_csv('clean_tac/' + pid + '_clean_TAC.csv') acc_data = pd.read_csv('accelerometer/accelerometer_' + pid + '.csv') tac_labels = [] for feat_no, feature in enumerate(features): print (' feature:', feature) array_long = [] for ind, row in tac_reading.iterrows(): if ind!=0: t1, t2 = prev_row['timestamp'], row['timestamp'] long_data = acc_data[ (acc_data['time']/1000 >= t1) & (acc_data['time']/1000 < t2) ] if not long_data.empty: if feat_no==0: if prev_row['TAC_Reading'] >= 0.08: tac_labels.append(1) else: tac_labels.append(0) if feature=='rms': lt = [] for axis in ['x', 'y', 'z']: lt.append(utils.rms(long_data[axis])) lt = np.array(lt) array_long.append(lt) else: short_datas = np.array_split(long_data, 300) # stores the features for every 1 second in 10 second segment array_short = [] for short_seg, short_data in enumerate(short_datas): # data_short = data_long[data_long['short_segment']==short_seg] lt = [] for axis in ['x', 'y', 'z']: data_axis = np.array(short_data[axis]) if feature=='mean': lt.append(utils.mean_feature(data_axis)) elif feature=='std': lt.append(utils.std(data_axis)) elif feature=='median': lt.append(utils.median(data_axis)) elif feature=='crossing_rate': lt.append(utils.crossing_rate(data_axis)) elif feature=='max_abs': lt.append(utils.max_abs(data_axis)) elif feature=='min_abs': lt.append(utils.min_abs(data_axis)) elif feature=='max_raw': lt.append(utils.max_raw(data_axis)) elif feature=='min_raw': lt.append(utils.min_raw(data_axis)) elif feature=='spec_entrp_freq': lt.append(utils.spectral_entropy_freq(data_axis)) elif feature=='spec_entrp_time': lt.append(utils.spectral_entropy_time(data_axis)) elif feature=='spec_centroid': lt.append(utils.spectral_centroid(data_axis)) elif feature=='spec_spread': lt.append(utils.spectral_spread(data_axis)) elif feature=='spec_rolloff': lt.append(utils.spectral_rolloff(data_axis)) elif feature=='max_freq': lt.append(utils.max_freq(data_axis)) elif feature=='spec_flux': if short_seg==0: lt.append(utils.spectral_flux(data_axis, np.zeros(len(data_axis)))) if axis=='x': x = data_axis elif axis=='y': y = data_axis elif axis=='z': z = data_axis else: if axis=='x': if len(data_axis) > len(x): zeros = np.zeros(len(data_axis) - len(x)) x = np.append(x, zeros) elif len(data_axis) < len(x): zeros = np.zeros(len(x) - len(data_axis)) data_axis = np.append(data_axis, zeros) lt.append(utils.spectral_flux(data_axis, x)) elif axis=='y': if len(data_axis) > len(y): zeros = np.zeros(len(data_axis) - len(y)) y = np.append(y, zeros) elif len(data_axis) < len(y): zeros = np.zeros(len(y) - len(data_axis)) data_axis = np.append(data_axis, zeros) lt.append(utils.spectral_flux(data_axis, y)) elif axis=='z': if len(data_axis) > len(z): zeros = np.zeros(len(data_axis) - len(z)) z = np.append(z, zeros) elif len(data_axis) < len(z): zeros = np.zeros(len(z) - len(data_axis)) data_axis = np.append(data_axis, zeros) lt.append(utils.spectral_flux(data_axis, z)) array_short.append(np.array(lt)) short_metric = np.array(array_short) array_long.append(short_metric) prev_row = row if feature=='rms': df = pd.DataFrame(columns=['Rms_x', 'Rms_y', 'Rms_z']) long_metric = np.array(array_long) df['Rms_x'] = long_metric[:,0:1].flatten() df['Rms_y'] = long_metric[:,1:2].flatten() df['Rms_z'] = long_metric[:,2:].flatten() df.to_csv('features/' + feature + '_feature.csv', index=False) else: long_metric = np.array(array_long) summary_stats(long_metric, feature, pid) print (' tac_labels: ', len(tac_labels)) rename_column_and_concat(pid, tac_labels)
def pdf2heads(opts, args, document): global Verbose_flag global test_flag xmltag = True highlight = False titleonly = False authonly = False Verbose_flag = False test_flag = False global look_for_all_caps_headings look_for_all_caps_headings = False global automatic_rerunning global Found_Heading global Found_abstract global Found_org global Found_key global Found_Author global Found_Level global Found_Sammanfattning global Found_Method global Found_Introduction global Found_TOC global abstractOut_path global OrgandSup_path global referat_path global methodOut_path global introductionOut_path global toc_path global heading_path global title_path global author_path global subtitle_path global end_tag global tree global mean_font_size global main_font_color global document_type global mean_font_size global author author = "" document_type = document start_to_exclude = False for o, a in opts: if (o == '--noxml'): xmltag = False elif (o == '--highlight'): highlight = True if (o == '--title'): titleonly = True elif (o == '--author'): authonly = True elif (o == '--unittest'): test_flag = True elif (o == '--verbose'): Verbose_flag = True print "Verbose_flag is on" elif (o == '--caps'): print "looking for ABSTRACT and other headers in all caps" look_for_all_caps_headings = True if automatic_rerunning: print "looking for ABSTRACT and other headers in all caps" look_for_all_caps_headings = True tree = pdf2etree(args) global title_head_txt # find title - look on the first page of the document at the first block of text on the page page = 1 block = 1 title_node = None while (page < 2): try: trial_title_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: #verse flag print "trial_title_node:" print trial_title_node # title_headers = trial_title_node.xpath(".//TOKEN[@font-size > {0}]".format(23)) # note that the Title is assumed to be 20 points or larger in size title_headers = trial_title_node.xpath( ".//TOKEN[(@font-size > {0} and @bold = 'yes') or (@font-size > {1} and @bold = 'yes')]" .format(20, 15)) if Verbose_flag: #verse flag print "title_headers:" print title_headers title_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in title_headers ]) if len(title_head_txt): #sucess title found print "Title: found" title_path = '../../../../output/parse_result/' + directiory + '/title.txt' txt = title_head_txt st = 'title' json_append(st, txt) # with open(title_path, 'w') as f: # print txt+ "\n" # print tag information to certain file # print >> f, txt, "\n" # print tag information to certain file title_node = trial_title_node next_block = block + 1 break block = block + 1 except IndexError: page += 1 # find subtitle - note that a subtitle is option - start on the 2nd page and second block on the page # WRONG SECOND PAGE IS TABLE OF CONTENt. page = 1 block = next_block print_log("next block is: " + str(block)) subtitle_node = None while (page < 2): try: trial_subtitle_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: print "trial_subtitle_node:" print trial_subtitle_node # the Subtitle is assumed to be larger than 19 points subtitle_headers = trial_subtitle_node.xpath( ".//TOKEN[(@font-size < {0} and @bold = 'no' and @italic= 'no') or (@font-size > {1} and @bold = 'no' and @italic= 'yes')]" .format(20, 13)) if Verbose_flag: print "subtitle_headers:" print subtitle_headers subtitle_path = '../../../../output/parse_result/' + directiory + '/subtitle.txt' title_path = '../../../../output/parse_result/' + directiory + '/title.txt' subtitle_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in subtitle_headers ]) if len(subtitle_head_txt) and not subtitle_head_txt.isdigit(): if title_head_txt == "Project proposal": subtitle_path = title_path print "Subtitle: not found" print "Title: found since title is project proporsal, replace subtitle as title" txt = subtitle_head_txt st = 'subtitle' json_append(st, txt) # with open(subtitle_path, 'w') as f: # print txt+ "\n" # print tag information to certain file # # print >> f, txt, "\n" # print tag information to certain file subtitle_node = trial_subtitle_node next_block = block + 1 print "Subtitle: found" break block = block + 1 except IndexError: page += 1 # find author - on cover page Found_Author = False Found_Level = False author_path = '../../../../output/parse_result/' + directiory + '/author_detail.txt' frontname_path = '../../../../output/parse_result/' + directiory + '/front_name.txt' aftername_path = '../../../../output/parse_result/' + directiory + '/after_name.txt' page = 1 block = next_block auth_node = None auth_count = 0 while (page < 2): try: trial_auth_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] if Verbose_flag: print "trial_auth_node:" print trial_auth_node # the author's name(s) is(are) assumed to be smaller than title bigger than degree project... auth_headers = trial_auth_node.xpath( ".//TOKEN[@font-size < {0} and @font-size > {1}]".format( 20, 11)) if Verbose_flag: print "auth_headers:" print auth_headers print_log(document_type) auth_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in auth_headers ]) auth_list = auth_head_txt.split(";") while (len(auth_head_txt) > 0) and auth_count < 2 and len( auth_list) > auth_count: #found print "Author: found" auth_head_txt = auth_list[auth_count - 1] auth_count += 1 name_split = auth_head_txt.split() txt = auth_head_txt author = author + "_" + auth_head_txt author_path = '../../../../output/parse_result/' + directiory + '/author_' + str( auth_count) + '.txt' st = 'author_' + str(auth_count) json_append(st, txt) # with open(author_path, 'w') as f: # print txt + "in" + author_path # print >> f, txt, "\n" # print tag information to certain file txt = name_split[0] frontname_path = '../../../../output/parse_result/' + directiory + '/author_' + str( auth_count) + '_frontname' + '.txt' st = 'author_' + str(auth_count) + '_frontname' json_append(st, txt) # with open(frontname_path, 'w') as f: # print txt + "in" + frontname_path # print >> f, txt, "\n" # print tag information to certain file txt = name_split[1] aftername_path = '../../../../output/parse_result/' + directiory + '/author_' + str( auth_count) + '_aftername' + '.txt' st = 'author_' + str(auth_count) + '_aftername' json_append(st, txt) # with open(aftername_path, 'w') as f: # print txt + "in" + aftername_path # print >> f, txt, "\n" # print tag information to certain file auth_node = trial_auth_node block = block + 1 except IndexError: page += 1 font_sizes = tree.xpath('//TOKEN/@font-size') mean_font_size = mean(font_sizes) median_font_size = median(font_sizes) # print "Median Font Size (i.e. body text):", median_font_size font_colors = tree.xpath('//TOKEN/@font-color') font_color_hash = {} for fc in font_colors: try: font_color_hash[fc] += 1 except KeyError: font_color_hash[fc] = 1 sortlist = [(v, k) for k, v in font_color_hash.iteritems()] sortlist.sort(reverse=True) main_font_color = sortlist[0][1] head_txts = [] stop = False page = 0 Found_abstract = False Found_org = False Found_key = False Found_Sammanfattning = False Found_Method = False Found_Introduction = False Found_TOC = False OrgandSup_path = '../../../../output/parse_result/' + directiory + '/Orignization_supervisor(en).txt' key_path = '../../../../output/parse_result/' + directiory + '/Keyword(en).txt' abstractOut_path = '../../../../output/parse_result/' + directiory + '/abstract(en).txt' abstractsvOut_path = '../../../../output/parse_result/' + directiory + '/abstract(sv).txt' referat_path = '../../../../output/parse_result/' + directiory + '/referat(sv).txt' methodOut_path = '../../../../output/parse_result/' + directiory + '/method(en).txt' toc_path = '../../../../output/parse_result/' + directiory + '/toc(en).txt' introductionOut_path = '../../../../output/parse_result/' + directiory + '/introduction(en).txt' heading_path = '../../../../output/parse_result/' + directiory + '/heading.txt' title_path = '../../../../output/parse_result/' + directiory + '/title.txt' #page node for page_node in tree.xpath('//PAGE'): page = page + 1 block_number = 0 for block_node in page_node.xpath('.//BLOCK'): block_number = block_number + 1 if xmltag: #specify data mining model #all gone to heading....not working!! if block_node == title_node: #found title st = "title" et = "title" if block_node == subtitle_node: #found subtitle st = "subtitle" et = "subtitle" elif block_node == auth_node: #found author #not working st = "author" et = "author" else: st = "heading" et = "heading" #found other headings if highlight: st = "\033[0;32m{0}\033[0m".format(st) et = "\033[0;32m{0}\033[0m".format(et) else: st = et = "" if block_node == title_node and authonly: continue # note that the assumption that the Abstract headings is set in a larger font then the median font sized used on a page, will not find # abstracts of Aalto university - as they set the word ABSTRACT in a slightly larger size font as used for the rest of the text, but they do set it in all CAPs if look_for_all_caps_headings: headers = block_node.xpath( ".//TOKEN[(@font-size > {0} and @bold = 'yes') or @font-color != '{1}']" .format(mean_font_size, main_font_color)) else: headers = block_node.xpath( ".//TOKEN[(@font-size > {0} and @bold = 'yes') or @font-color != '{1}']" .format(mean_font_size * 1.05, main_font_color)) level_headers = block_node.xpath( ".//TOKEN[@font-size > {0}]".format(0)) head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in headers ]) level_head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in level_headers ]) # print head_txt if head_txt in text_start_to_exclude: start_to_exclude = True head_txt = filter_headings(head_txt) if len(head_txt) and (not start_to_exclude): head_txts.append("{0}{1}{2}".format( st, head_txt, et)) #append st tag_content andet # model for proposal if (int(document_type) == 1): print_log("first content check: " + head_txt) if head_txt.find("Authors") >= 0 or head_txt.find( "Author") >= 0: if not Found_Author: # if the abstract has not been found yet print "Authors(en): OVERIDE " print "Authors and detail information (en): found " author = "" output_text_on_block_on_page(page_node, block_number, page, author_path) author = auth Found_Author = True if level_head_txt.find("Bachelor") >= 0 or level_head_txt.find( "Master") >= 0 or level_head_txt.find( "Degree Project") >= 0: if not Found_Level: # if the abstract has not been found yet print_log("Level: found") level_path = '../../../../output/parse_result/' + directiory + '/level.txt' st = 'level' json_append(st, level_head_txt) # with open(level_path, 'w') as f: # print level_head_txt + "\n" # print tag information to certain file # print >> f, level_head_txt, "\n" # print tag information to certain file Found_Level = True if head_txt.find("Organization and Supervisor") >= 0 or ( head_txt.find("Organization") >= 0 and head_txt.find("Supervisor") >= 0): if not Found_org: # if the abstract has not been found yet print "Organization and Supervisor (en): found" output_blocks_on_page(page_node, block_number, page, OrgandSup_path, 0) Found_org = True if head_txt.find("Keywords") >= 0 or head_txt.find( "Keyword") >= 0: print_log("I should be herer!!!!!") if not Found_key: # if the abstract has not been found yet print "Keywords(en): found" output_blocks_on_page(page_node, block_number, page, key_path, 0) Found_key = True # model for thesis if head_txt.find("Abstract") >= 0 or head_txt.find( "ABSTRACT") >= 0: if not Found_abstract: #if the abstract has not been found yet print "Abstract (en): found" output_blocks_on_page(page_node, block_number, page, abstractOut_path, 0) Found_abstract = True break if head_txt.find("Sammanfattning") >= 0 or head_txt.find( "SAMMANFATTNING") >= 0: if not Found_Sammanfattning: print "Sammanfattning (sv): found" output_blocks_on_page(page_node, block_number, page, abstractsvOut_path, 0) Found_Sammanfattning = True break if head_txt.find("Abstrakt") >= 0 or head_txt.find( "ABSTRAKT") >= 0: if not Found_Sammanfattning: print "Abstrakt (sv): found" output_blocks_on_page(page_node, block_number, page, abstractOut_path, 0) Found_Sammanfattning = True break if head_txt.find("Referat") >= 0 or head_txt.find("REFERAT") >= 0: if not Found_Sammanfattning: print "Referat (sv): found" output_blocks_on_page(page_node, block_number, page, referat_path, 0) Found_Sammanfattning = True break #table of content if head_txt.find("Table of Contents") >= 0 or head_txt.find( "Contents") >= 0: if not Found_TOC: # if the abstract has not been found yet print "TOC (en): found" output_blocks_on_page(page_node, block_number, page, toc_path, 0) Found_TOC = True break if head_txt.find("Introduction") >= 0 or head_txt.find( "INTRODUCTION") >= 0: if not Found_Introduction: # if the abstract has not been found yet print "Introduction (en): found" output_blocks_on_page(page_node, block_number, page, introductionOut_path, 1) Found_Introduction = True #Found_Introduction = True break if head_txt.find("Methods") >= 0 or head_txt.find( "METHODS") >= 0 or head_txt.find( "Methodology") >= 0 or head_txt.find( "METHODOLOGY") >= 0: if not Found_Method: #if the abstract has not been found yet print "Methods (en): found" output_blocks_on_page(page_node, block_number, page, methodOut_path, 0) Found_Method = True break # # if head_txt.find("Abstracto(sp)") >= 0: # print "Abstracto (sp):" # output_blocks_on_page(page_node, block_number, page) # break # # if head_txt.find("Abstrait (fr)") >= 0: # print "Abstrait (fr):" # output_blocks_on_page(page_node, block_number, page) # break if block_node == title_node and titleonly: stop = True break elif block_node == auth_node and authonly: stop = True break if stop: break
def pdf2heads(opts, args): xmltag = True highlight = False titleonly = False authonly = False for o, a in opts: if (o == '--noxml'): xmltag = False elif (o == '--highlight'): highlight = True if (o == '--title'): titleonly = True elif (o == '--author'): authonly = True tree = pdf2etree(args) # find title page = 1 block = 1 title_node = None while True: try: title_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] except IndexError: page += 1 else: break if page > 2: # probably not going to find it now break # find author page = 1 block = 2 auth_node = None while True: try: auth_node = tree.xpath("//PAGE[{0}]//BLOCK[{1}]".format( page, block))[0] except InbdexError: block += 1 else: break if block > 4: # probably not going to find it now break font_sizes = tree.xpath('//TOKEN/@font-size') mean_font_size = mean(font_sizes) median_font_size = median(font_sizes) #print "Median Font Size (i.e. body text):", median_font_size font_colors = tree.xpath('//TOKEN/@font-color') font_color_hash = {} for fc in font_colors: try: font_color_hash[fc] += 1 except KeyError: font_color_hash[fc] = 1 sortlist = [(v, k) for k, v in font_color_hash.iteritems()] sortlist.sort(reverse=True) main_font_color = sortlist[0][1] head_txts = [] stop = False for page_node in tree.xpath('//PAGE'): for block_node in page_node.xpath('.//BLOCK'): if xmltag: if block_node == title_node: st = "<title>" et = "</title>" elif block_node == auth_node: st = "<author>" et = "</author>" else: st = "<heading>" et = "</heading>" if highlight: st = "\033[0;32m{0}\033[0m".format(st) et = "\033[0;32m{0}\033[0m".format(et) else: st = et = "" if block_node == title_node and authonly: continue headers = block_node.xpath( ".//TOKEN[@font-size > {0} or @bold = 'yes' or @font-color != '{1}']" .format(mean_font_size * 1.05, main_font_color)) head_txt = ' '.join([ etree.tostring(el, method='text', encoding="UTF-8") for el in headers ]) if len(head_txt): head_txts.append("{0}{1}{2}".format(st, head_txt, et)) if block_node == title_node and titleonly: stop = True break elif block_node == auth_node and authonly: stop = True break if stop: break for txt in head_txts: sys.stdout.writelines([txt, '\n'])