def get_status_and_title(link, x): # title title = utils.get_title(x, selectors) if title.text.find("shared a memory") != -1: x = x.find_element_by_xpath(selectors.get("title_element")) title = utils.get_title(x, selectors) status = utils.get_status(x, selectors) if title.text == driver.find_element_by_id( selectors.get("title_text")).text: if status == "": temp = utils.get_div_links(x, "img", selectors) if temp == "": # no image tag which means . it is not a life event link = utils.get_div_links(x, "a", selectors).get_attribute("href") post_type = "status update without text" else: post_type = "life event" link = utils.get_div_links(x, "a", selectors).get_attribute("href") status = utils.get_div_links(x, "a", selectors).text else: post_type = "status update" if utils.get_div_links(x, "a", selectors) != "": link = utils.get_div_links(x, "a", selectors).get_attribute("href") elif title.text.find(" shared ") != -1: x1, link = utils.get_title_links(title) post_type = "shared " + x1 elif title.text.find(" at ") != -1 or title.text.find(" in ") != -1: if title.text.find(" at ") != -1: x1, link = utils.get_title_links(title) post_type = "check in" elif title.text.find(" in ") != 1: status = utils.get_div_links(x, "a", selectors).text elif title.text.find(" added ") != -1 and title.text.find("photo") != -1: post_type = "added photo" link = utils.get_div_links(x, "a", selectors).get_attribute("href") elif title.text.find(" added ") != -1 and title.text.find("video") != -1: post_type = "added video" link = utils.get_div_links(x, "a", selectors).get_attribute("href") else: post_type = "others" if not isinstance(title, str): title = title.text status = status.replace("\n", " ") title = title.replace("\n", " ") return link, status, title, post_type
def post_content(self, kw, cnt, bar): ''' res:{'opgroup': '0', 'pid': '125867227452', 'tid': '6147431000', 'msg': '发送成功', 'pre_msg': '经验 ', 'info': {'access_state': [], 'confilter_hitwords': [], 'need_vcode': '0', 'vcode_md5': '7555x/KllzCmyK+jbZ9frCkGvrEKm/lvsIWXiJNGWK/4Z2lzOtCPczDKRsCjCJnP', 'vcode_prev_type': '0', 'vcode_type': '0', 'pass_token': ''}, 'time': 1559196367, 'ctime': 0, 'logid': 367165643, 'error_code': '0', 'server_time': '569751'} ''' contents = self.convert_to_contents(cnt, kw) fid = get_fid(kw) title = '{}{}'.format(kw, get_title()) if check(self.bduss): res = client_thread_add(self.bduss, kw, fid, contents[0], title) print(res) time.sleep(30) if 'msg' in res and res['msg'] != "发送成功": print('发帖失败{}'.format(kw)) return print('{}发帖成功'.format(kw)) tid = res['tid'] for cont in contents[1:]: post = client_Post(self.bduss, kw, tid, fid, cont) if 'error_msg' in post: print('回帖失败') return print(post) time.sleep(15) print('{}回帖成功'.format(kw)) bar.hassend = True self.session.commit() time.sleep(100)
def write_query_set_folder(db, prefix) : ''' Load queries from the prefix.txt, get citations for them and write each to a single file under folder prefix ''' # Create folder if it doesn't exist if not os.path.exists(prefix) : os.mkdir(prefix) queries_file_path = prefix + ".txt" with open(queries_file_path, 'r') as file : for line in file : pub_id, year, title, _query_ = line.strip().split('\t') file_path = "%s/%s.txt" % (prefix, pub_id) citations = db.select("cited", table="graph", where="citing='%s'"%pub_id) # Write seed document id and then one citation per line with open(file_path, 'w') as citations_file : print >> citations_file, "%s\t%s\t%s" % (pub_id, year, title) for cited in citations: title = utils.get_title(db, cited).strip() print >> citations_file, "%s\t%s\t%s" % ("R1", cited, title.encode("UTF-8"))
def run(play_path, stats): gender_path, output_path, output_path_base, play_name = get_paths(play_path) # print(play_name) raw_play_lines, gender = get_files(play_path, gender_path) stats[play_name] = {'title' : get_title(raw_play_lines)} play_stats= stats[play_name] output = process_play(raw_play_lines, gender, play_stats)
def generate_tweet_text(hilt, blade, pommel): hilt_details = MANIFEST["hilt"][hilt] blade_details = MANIFEST["blade"][blade] pommel_details = MANIFEST["pommel"][pommel] hilt_length = hilt_details["length"] pommel_length = pommel_details["length"] total_length = hilt_length + pommel_length average_length = AVERAGE_HILT_LENGTH + AVERAGE_POMMEL_LENGTH blade_length = int(AVERAGE_BLADE_LENGTH * (total_length / average_length)) if DOUBLE_BLADE: total_length = hilt_length * 2 blade_length *= 2 title = get_title(blade_details) crystal = get_crystal(blade_details) name = f"{title} {random.choice(NAMES)}" tweet = f"""Owner: {name} Hilt Length: {total_length} cm Blade Length: {blade_length} cm Blade Colour: {MANIFEST['blade'][blade]['colour']} Kyber Crystal: {crystal} #StarWars #lightsaber #{title} """ return tweet
def generate(): posts = utils.get_posts() ppp = config['posts_per_page'] pages = int(math.ceil(float(len(posts)) / ppp)) utils.clear_dir('site/page') for i in range(pages): page_content = render_template('frontend/index.html', config=config, frontend=True, current=i + 1, first=(i == 0), last=(i == pages - 1), posts=posts[i * ppp:(i + 1) * ppp]) file('site/page/%s.html' % (i + 1), 'w').write( page_content.encode(config['encoding'])) if i == 0: file('site/index.html', 'w').write( page_content.encode(config['encoding'])) not_found_content = render_template('404.html', config=config, frontend=True) file('site/404.html', 'w').write( not_found_content.encode(config['encoding'])) utils.clear_dir('site/posts') infos = utils.get_post_infos() feed = AtomFeed(config['title'], feed_url=config['url_root'] + '/posts.atom', url=config['url_root']) for info in infos: with open('posts/%s' % info['filename'], 'r') as f: content = f.read().decode(config['encoding']) title = utils.get_title(content) content = utils.postprocess_post_content(info['slug'], content, False) html_content = render_template('frontend/post.html', config=config, frontend=True, title=title, content=content) file('site/posts/%s.html' % info['slug'], 'w').write( html_content.encode(config['encoding'])) feed_content = render_template('feed.html', config=config, content=content) feed.add(title, feed_content, content_type='html', url=make_external('/posts/' + info['slug']), author='Tony Wang', published=utils.date_localize_from_utc(info['time'], True), updated=utils.date_localize_from_utc(info['time'], True)) file('site/posts.atom', 'w').write(str(feed.get_response().iter_encoded(config['encoding']).next())) return 'Done!'
def edit(self): self.show() print("EDIT entry (Leave fields blank for no changes)") self.title = utils.get_title(self.title) self.date = utils.get_date(self.date) self.time = utils.get_time(self.time) self.notes = utils.get_notes(self.notes) self.save()
def add_task(cls): """Add new entry""" employee, _ = models.Employee.get_or_create(name=utils.get_name()) task = models.Task.create(employee=employee, title=utils.get_title(), time=utils.get_time(), notes=utils.get_notes()) task.show() input("The entry has been added. Press enter to return to the menu")
def get(self, key, default=None): """ Access attributes of the item. If the attribute is not found the default value (None) will be returned. """ if key.startswith('tmp:'): return self._beacon_tmpdata.get(key[4:], default) if key == 'parent': return self._beacon_parent if key == 'media': return self._beacon_media if key == 'read_only': # FIXME: this is not correct, a directory can also be # read only on a rw filesystem. return self._beacon_media.get('volume.read_only', default) if key in ('image', 'thumbnail'): image = self._beacon_data.get('image') if not image: if self._beacon_parent and self._beacon_id: # This is not a good solution, maybe the parent is # not up to date. Well, we have to live with that # for now. Only get image from parent if the item # is scanned because it is a very bad idea that # unscanned images (we do not know that they are # images yet) inherit the image from a directory. image = self._beacon_parent.get('image') if not image: return default if image.startswith('http://'): fname = self._beacon_controller._db.md5url(image, 'images') if key == 'image': if not os.path.isfile(fname): # FIXME: We need to fetch the image. Right now this will not happen # until beacon restarts or a thumbnail is requested return default return fname if key == 'thumbnail': # the thumbnail code will take care of downloading return Thumbnail(image, self._beacon_media) if key == 'image': return image if key == 'thumbnail': return Thumbnail(image, self._beacon_media) if key == 'title': t = self._beacon_data.get('title') if t: return t # generate some title and save local it for future use t = kaa.str_to_unicode( get_title(self._beacon_data['name'], self.isfile)) self._beacon_data['title'] = t return t result = self._beacon_data.get(key, default) if result is None: return default return result
def index(): infos = utils.get_post_infos() for info in infos: with open('posts/%s' % info['filename'], 'r') as f: content = f.read().decode(config['encoding']) title = utils.get_title(content) info['title'] = title info['date'] = utils.date_localize_from_utc(info['time']) return render_template('admin/index.html', config=config, infos=infos)
def get(self, key, default=None): """ Access attributes of the item. If the attribute is not found the default value (None) will be returned. """ if key.startswith('tmp:'): return self._beacon_tmpdata.get(key[4:], default) if key == 'parent': return self._beacon_parent if key == 'media': return self._beacon_media if key == 'read_only': # FIXME: this is not correct, a directory can also be # read only on a rw filesystem. return self._beacon_media.get('volume.read_only', default) if key in ('image', 'thumbnail'): image = self._beacon_data.get('image') if not image: if self._beacon_parent and self._beacon_id: # This is not a good solution, maybe the parent is # not up to date. Well, we have to live with that # for now. Only get image from parent if the item # is scanned because it is a very bad idea that # unscanned images (we do not know that they are # images yet) inherit the image from a directory. image = self._beacon_parent.get('image') if not image: return default if image.startswith('http://'): fname = self._beacon_controller._db.md5url(image, 'images') if key == 'image': if not os.path.isfile(fname): # FIXME: We need to fetch the image. Right now this will not happen # until beacon restarts or a thumbnail is requested return default return fname if key == 'thumbnail': # the thumbnail code will take care of downloading return Thumbnail(image, self._beacon_media) if key == 'image': return image if key == 'thumbnail': return Thumbnail(image, self._beacon_media) if key == 'title': t = self._beacon_data.get('title') if t: return t # generate some title and save local it for future use t = kaa.str_to_unicode(get_title(self._beacon_data['name'], self.isfile)) self._beacon_data['title'] = t return t result = self._beacon_data.get(key, default) if result is None: return default return result
def edit(self): """ Let the user to edit a task by being asked to edit any of their attributes. If any field is left blank, then it wont be changed. """ self.show() print("EDIT entry (Leave fields blank for no changes)") self.title = utils.get_title(self.title) self.date = utils.get_date(self.date) self.time = utils.get_time(self.time) self.notes = utils.get_notes(self.notes)
def gen_filename(record): """ Guess the expected filename from the record. Args: record (dict): a record of the bibtex entry. Returns: A string which corresponds to guessed filename (expected to be a pdf). """ record_copy = record.copy() record_copy = bibtexparser.customization.author(record_copy) # Retrieve a stripped down last name of the first authors last_names = [] for author in record_copy['author']: stripped = utils.strip_accents(codecs.decode(author, "ulatex")) name = re.sub('([\\{\\}])', '', stripped.split(',')[0]) name = re.sub('~', ' ', name) name = re.sub("\\\\'ı", "i", name) name = re.sub("\\\\`ı", "i", name) name = re.sub("ı", "i", name) name = re.sub('\xf8', 'o', name) name = re.sub('\\\\textquotesingle ', "'", name) name = name.replace('ł', 'l') last_names.append(name) # If there are more than 4 authors, use the 'et al.' form if len(last_names) > 4: prefix = '(' + last_names[0] + ' et al.) ' else: prefix = '(' + ', '.join(last_names) + ') ' title = utils.get_title(record_copy) title = title.replace('$\\Lambda_{훜fty}$ ', 'λ∞') title = re.sub('\\\\textendash ', '- ', title) title = utils.strip_accents(codecs.decode(title, "ulatex")) title = re.sub('([\\{\\}])', '', title) title = re.sub(' *: ', ' - ', title) title = re.sub(' *— *', ' - ', title) title = re.sub('–', '-', title) title = re.sub('/', '-', title) # title = re.sub('\\$\\mathplus \\$', '+', title) title = re.sub('\\\\textquotesingle ', "'", title) title = to_titlecase(title) title = re.sub('"', '', title) title = re.sub('’', "'", title) title = re.sub('\u2010', '-', title) title = re.sub('\u2122', '', title) title = title.replace('$\\texttt FreeFem++$', 'FreeFem++') title = title.replace('$\\lambda _\\Infty $ ', 'λ∞') return prefix + title + '.pdf'
def __init__(self, **kwargs): """Initialize an instance of Task with needed attributes""" if kwargs: self.title = kwargs.get('Title') self.date = datetime.datetime.strptime(kwargs.get('Date'), '%d/%m/%Y').date() self.time = kwargs.get('Time') self.notes = kwargs.get('Notes') else: self.title = utils.get_title() self.date = utils.get_date() self.time = utils.get_time() self.notes = utils.get_notes()
def gen_bibkey(record, all_keys): """ Generate a unique bibtex key for the given record. Args: record (dict): a record of the bibtex entry. all_keys (set): a set of existing bibtex keys in the current context. Returns: A string which corresponds to the newly generated unique bibtex key. The argument 'all_keys' is also appended with the new key. """ for field in ['year', 'title', 'author']: if field not in record: record_str = json.dumps(record, sort_keys=True, indent=4, separators=(',', ': ')) raise ValueError( "Missing field '{0}' in bibtex entry:\n{1}".format( field, record_str)) record_copy = record.copy() record_copy = bibtexparser.customization.author(record_copy) # Retrieve a stripped down last name of the first author first_author = record_copy['author'][0] stripped = utils.strip_accents(codecs.decode(first_author, "ulatex")) last_name = stripped.split(',')[0] last_name = last_name.replace('ø', 'o') last_name = last_name.replace('ł', 'l') last_name = re.sub('([^a-zA-Z])', '', last_name) # Then get the first 3 initials of the article title curated_title = re.sub('([^a-zA-Z])', ' ', utils.get_title(record_copy)) short_title = ''.join(s[0] for s in curated_title.split()) short_title += curated_title.split()[-1][1:] short_title = short_title[:3].upper() # Key is Author:Year:Initials basekey = last_name + ":" + record_copy['year'] + ":" + short_title bibkey = basekey # Assign a unique key tail = 'a' while bibkey in all_keys: bibkey = basekey + tail tail = chr((ord(tail) + 1)) all_keys.add(bibkey) return bibkey
def browse_papers(path_, csv_file, fout): fo = open(fout, 'w') print("Processing citations ...") dict_1, dict_2 = parse_csv_file(csv_file) print("Processing files ...") tmp_list = [] for root, dirs, files in os.walk(path_): for name in files: if name.endswith((".json")): jfile = root + "/" + name data = json.load(open(jfile)) year, month, day = get_date_jsonfile(jfile, data) journal = get_journal_short_json(jfile, data) issue, volume = get_issue_volume(jfile, data) doi = get_doi(jfile, data) num_pages = get_number_of_pages(jfile, data) coauthors = get_coauthors_jsonfile(jfile, data) affiliations = get_all_affiliations(jfile, data) countries = get_all_countries(jfile, data) title = get_title(jfile, data) str_out = "" str_out += str(year) + " " str_out += str(month) + " " str_out += str(day) + " " str_out += str(journal) + " " str_out += str(issue) + " " str_out += str(volume) + " " str_out += str(doi) + " " str_out += str(len(coauthors)) + " " str_out += str(len(affiliations)) + " " str_out += str(len(countries)) + " " str_out += str(len(title)) + " " str_out += str(num_pages) + " " if doi in dict_1.keys(): str_out += str(len(dict_1[doi])) + " " else: str_out += str(0) + " " if doi in dict_2.keys(): str_out += str(len(dict_2[doi])) + " " else: str_out += str(0) + " " fo.write(str_out + "\n") fo.close()
def main(): #TODO: add support for mysql for subgroup in d.available_subgroups: try: cursor.execute("SELECT * FROM %s;" %(subgroup['db_name'])) anime_list = cursor.fetchall() except Exception as e: logging.error(e) for anime in anime_list: #Gets rss feed with params search_param = ' '.join(anime) i = 1 while True: feed_url = ('%s?page=rss&term=%s&user=%s&offset=%d' %(nyaa_url, search_param, subgroup['nyaa_id'], i)).replace(' ', '+') i = i + i feed = feedparser.parse(feed_url) if not feed['entries']: break #pprint(feed) for feed_entry in feed['entries']: feed_title = feed_entry['title'] parsed_title = utils.get_title(feed_title, subgroup['regex']['title']) if parsed_title == anime[0]: parsed_episode = utils.get_episode(feed_title, subgroup['regex']['episode']) if parsed_episode: cursor.execute("SELECT 1 FROM downloaded WHERE title='%s' AND episode='%s' AND subgroup='%s';" %(anime[0].replace("'", "''"), parsed_episode, subgroup['subgroup'])) if not cursor.fetchone(): dl_location = cfg.dl_location + anime[0] if not os.path.exists(dl_location): os.mkdir(dl_location) download_queue.append({'torrent': download_torrent(anime[0], feed_entry['link']), 'info': {'title': anime[0], 'episode': parsed_episode, 'quality': anime[1], 'subgroup': subgroup['subgroup']}}) cursor.execute("INSERT INTO downloaded VALUES('%s', '%s', '%s', '%s', '%s');" %(anime[0].replace("'", "''"), parsed_episode, datetime.datetime.now().isoformat(), subgroup['subgroup'], 'Downloading')) connection.commit() # spawn child thread here to monitor downloads. while len(download_queue): for torrent in download_queue: torrent_obj = tc.get_torrent(torrent['torrent']) if torrent_obj.status == 'seeding': logging.info('%s completed.' %(torrent_obj.name)) cursor.execute("UPDATE downloaded SET status='Completed' WHERE title='%s' AND episode='%s' AND subgroup='%s';" %(torrent['info']['title'].replace("'", "''"), torrent['info']['episode'], torrent['info']['subgroup'])) connection.commit() download_queue.remove(torrent) sleep(1) connection.commit() connection.close()
def __init__(self, link, user, title = None, points = 0, domain = "", submitted = datetime.now()): self.link = link self.title = title self.user = user self.points = points self.submitted = submitted if title: self.title = title else: self.title = get_title(link) parsed_uri = urlparse(link) self.domain = '{uri.netloc}'.format(uri=parsed_uri)
def summarize(page_id, results_str, features): print 'results for %s (id=%d)' % (utils.get_title(page_id).encode( 'ascii', 'ignore'), page_id) if len(results_str) < 2: print '\tno results' return i = 1 results = [] ranks_by_score = collections.defaultdict(list) for pair in results_str[1:-1].split('|'): tokens = pair.split(',') page_id2 = int(tokens[0]) score = float(tokens[1]) used_features = tokens[2] if len(tokens) == 3 else '' results.append([page_id2, score, used_features]) ranks_by_score[score].append(i) i += 1 ranks_to_show = set() r = 1 while r <= len(results): for i in range(r, r + 3): ranks_to_show.add(i) r *= 2 ranks_to_show = [r for r in sorted(ranks_to_show) if r <= len(results)] for rank in ranks_to_show: (page_id2, score, used_features) = results[rank - 1] tie = '' if len(ranks_by_score[score]) > 1: tie = ', %d-way tie' % len(ranks_by_score[score]) feature_info = '' if features and used_features: feature_info = ', %s:%s' % (used_features, features) print(u'\t%.5d: %s (id=%d, score=%.3f%s%s)' % (rank, utils.get_title(page_id2), page_id2, score, tie, feature_info)).encode('utf-8')
def get_all_websphere(page=None): """ 分页获取websphere的信息列表 :param page: 当前分页 :return: 分页后的结果列表及分页信息 """ app.logger.debug("run into get_all_websphere function") page = request.args.get('page', 1, type=int) paginate = db.session.query(WebSphere, System).join(System).order_by( System.inventory).paginate(page, NUM_PER_PAGE) was_list_in = paginate.items return render_template("all_websphere.html", title=get_title("WebSphere信息列表"), pagination=paginate, was_list=was_list_in)
def get_all_db2(page=None): """ 分页获取DB2的信息列表 :param page: 当前分页 :return: 分页后的结果列表及分页信息 """ app.logger.debug("run into get_all_db2 function") page = request.args.get('page', 1, type=int) paginate = db.session.query(DB2, System).join(System).paginate( page, NUM_PER_PAGE) db2_list_in = paginate.items return render_template("all_db2.html", title=get_title("DB2信息列表"), pagination=paginate, db2_list=db2_list_in)
def summarize(page_id, results_str, features): print 'results for %s (id=%d)' % (utils.get_title(page_id).encode('ascii', 'ignore'), page_id) if len(results_str) < 2: print '\tno results' return i = 1 results = [] ranks_by_score = collections.defaultdict(list) for pair in results_str[1:-1].split('|'): tokens = pair.split(',') page_id2 = int(tokens[0]) score = float(tokens[1]) used_features = tokens[2] if len(tokens) == 3 else '' results.append([page_id2, score, used_features]) ranks_by_score[score].append(i) i += 1 ranks_to_show = set() r = 1 while r <= len(results): for i in range(r,r+3): ranks_to_show.add(i) r *= 2 ranks_to_show = [r for r in sorted(ranks_to_show) if r <= len(results)] for rank in ranks_to_show: (page_id2, score, used_features) = results[rank-1] tie = '' if len(ranks_by_score[score]) > 1: tie = ', %d-way tie' % len(ranks_by_score[score]) feature_info = '' if features and used_features: feature_info = ', %s:%s' % (used_features, features) print (u'\t%.5d: %s (id=%d, score=%.3f%s%s)' % (rank, utils.get_title(page_id2), page_id2, score, tie, feature_info) ).encode('utf-8')
def lambda_handler(event, context): options = FirefoxOptions() options.headless = True options.binary_location = '/usr/local/bin/firefox' driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', log_path='/tmp/geckodriver.log', firefox_options=options) title = get_title(driver) print(title) driver.quit() return {'statusCode': 200, 'body': json.dumps("LGTM")}
def edit(time_slug): filename = '%s.md' % time_slug file_path = 'posts/%s' % filename if not os.path.exists(file_path): # TODO: 404 page return '', 404 if request.method == 'GET': info = utils.parse_filename(filename) with open(file_path) as f: content = f.read().decode(config['encoding']) info['title'] = utils.get_title(content) info['content'] = '\n'.join(content.splitlines()[4:]) info['date'] = utils.date_localize_from_utc(info['time']) return render_template('admin/edit.html', config=config, info=info) elif request.method == 'POST': result = utils.parse_filename(filename) if not result: return '', 404 title = request.form['title'].strip() date = request.form['date'].strip() content = request.form['content'].strip() slug = request.form['slug'].strip() try: post_time = utils.datetime2epoch(date) except ValueError: # TODO: flash message return '', 404 time_str = utils.date_localize_from_utc(post_time) file_to_remove = None if post_time != result['time'] or slug != result['slug']: file_to_remove = file_path file_path = 'posts/%s-%s.md' % (post_time, slug) file_content = content_template % {'title': title, 'time': time_str, 'content': content} with open(file_path, 'w') as f: f.write(file_content.encode(config['encoding'])) if file_to_remove: os.remove(file_to_remove) return redirect('/')
def get_filter_system(inventory_filter=None, os_filter=None): """ 获取过滤后的系统信息,可以根据(inventory/os)进行过滤 :param inventory_filter: IP过滤器 :param os_filter: 操作系统类型过滤器 :return: details.html """ app.logger.debug("filter") sys_was_count_list = [] sys_db2_count_list = [] if request.method == 'POST': inventory_filter = request.form['inventory_filter'] os_filter = request.form['os_filter'] app.logger.debug("POST") elif request.method == 'GET': inventory_filter = request.args.get('inventory_filter') os_filter = request.args.get('os_filter') app.logger.debug("GET") app.logger.debug("inventory_filter: {0}".format(inventory_filter)) app.logger.debug("os_filter: {0}".format(os_filter)) page = request.args.get('page', 1, type=int) # 对结果进行分页 if os_filter == "all": paginate = System.query.filter( System.inventory.like("%{0}%".format(inventory_filter))).paginate( page, NUM_PER_PAGE) else: paginate = System.query.filter(System.inventory.like("%{0}%".format(inventory_filter))). \ filter(System.os_info == str(os_filter)).paginate(page, NUM_PER_PAGE) systems = paginate.items for one_system in systems: sys_was_count = WebSphere.query.filter_by( sys_inventory=one_system.inventory).count() sys_db2_count = DB2.query.filter_by( sys_inventory=one_system.inventory).count() sys_was_count_list.append(sys_was_count) sys_db2_count_list.append(sys_db2_count) db.session.close() app.logger.debug(systems) return render_template("all_system.html", inventory_filter_val=inventory_filter, title=get_title("主机信息列表"), system_list=systems, pagination=paginate, os_filter_val=os_filter, os_list_val=get_os_list(), sys_was_count_list=sys_was_count_list, sys_db2_count_list=sys_db2_count_list)
def gen_bibkey(record, all_keys): """ Generate a unique bibtex key for the given record. Args: record (dict): a record of the bibtex entry. all_keys (set): a set of existing bibtex keys in the current context. Returns: A string which corresponds to the newly generated unique bibtex key. The argument 'all_keys' is also appended with the new key. """ for field in ['year', 'title', 'author']: if field not in record: record_str = json.dumps(record, sort_keys=True, indent=4, separators=(',', ': ')) raise ValueError("Missing field '{0}' in bibtex entry:\n{1}".format(field, record_str)) record_copy = record.copy() record_copy = bibtexparser.customization.author(record_copy) # Retrieve a stripped down last name of the first author first_author = record_copy['author'][0] stripped = utils.strip_accents(codecs.decode(first_author, "ulatex")) last_name = stripped.split(',')[0] last_name = last_name.replace('ø', 'o') last_name = last_name.replace('ł', 'l') last_name = re.sub('([^a-zA-Z])', '', last_name) # Then get the first 3 initials of the article title curated_title = re.sub('([^a-zA-Z])', ' ', utils.get_title(record_copy)) short_title = ''.join(s[0] for s in curated_title.split()) short_title += curated_title.split()[-1][1:] short_title = short_title[:3].upper() # Key is Author:Year:Initials basekey = last_name + ":" + record_copy['year'] + ":" + short_title bibkey = basekey # Assign a unique key tail = 'a' while bibkey in all_keys: bibkey = basekey + tail tail = chr((ord(tail) + 1)) all_keys.add(bibkey) return bibkey
def __init__(self,link): self.link = link self.title = "" self.time = 0 self.content = "" self.keywords = "" self.refer = [] self.status = False # 是否解析成功 # 检查是否在已解析的连接里面 # 检查是否在无法解析的名单内 if link.find('http://') == -1: return # invalid link base_url = 'http://' + link.split('/')[2] # unparse_check = store.find(UnparsePage_m, UnparsePage_m.url == base_url.decode('utf-8')) # if unparse_check.count() != 0: # print "can not parse this link" # return self.pq = "" try: self.pq = pq(url=link).make_links_absolute() #可能会解析失败 except Exception as err: print "failed to open this link " + link if self.pq == "": return # get title self.title = get_title(self.pq) self.time = time.time() self.content = get_content(self.pq) self.refer = get_refer(self.pq) if len(self.title) == 0 or \ len(self.content) == 0 or len(self.refer) == 0: # 无法成功解析 print "can not parse " + link # 把网址添加异常网站数据库 mpage = UnparsePage_m() mpage.url = base_url.decode('utf-8') mpage.save() self.keywords = '' return else: # get keywords self.keywords = jieba.cut_for_search(self.title) self.status = True
def _poll(self): url = self.preferences.jenkinsURL client = self.preferences.newJenkinsClient() # Jobs: _jobs = client.get_jobs() if self.preferences.extendedInfo: for jobDict in _jobs: extendedInfo = client.get_job_info(jobDict["name"]) jobDict.update(extendedInfo) # favicon: _tempFileName = self._getFavIcon(url) # title: try: title = get_title(url) except Exception, e: title = self.UNABLE_TO_CONNECT_TITLE print e
def _beacon_update(self, prop): """ Update media properties. """ self.prop = prop self.device = str(prop.get('block.device','')) self.mountpoint = str(prop.get('volume.mount_point','')) log.info('new media %s (%s) at %s', self.id, self.device, self.mountpoint) if not self.mountpoint: self.mountpoint = self.device if not self.mountpoint.endswith('/'): self.mountpoint += '/' # get basic information from database media = self._beacon_controller._beacon_media_information(self) if isinstance(media, kaa.InProgress): # This will happen for the client because in the client # _beacon_media_information needs to lock the db. media = yield media self.beaconid = media['id'] prop['beacon.content'] = media['content'] self._beacon_isdir = False if media['content'] == 'file': self._beacon_isdir = True self.thumbnails = os.path.join(self.overlay, '.thumbnails') if self.mountpoint == '/': self.thumbnails = os.path.join(os.environ['HOME'], '.thumbnails') if self.root.get('title'): self.label = self.root.get('title') elif prop.get('volume.label'): self.label = utils.get_title(prop.get('volume.label')) elif prop.get('info.parent'): self.label = u'' parent = prop.get('info.parent') if parent.get('storage.vendor'): self.label += parent.get('storage.vendor') + u' ' if parent.get('info.product'): self.label += parent.get('info.product') self.label.strip() if self.device: self.label += ' (%s)' % self.device if not self.label: self.label = self.id else: self.label = self.id
def _beacon_update(self, prop): """ Update media properties. """ self.prop = prop self.device = str(prop.get('block.device', '')) self.mountpoint = str(prop.get('volume.mount_point', '')) log.info('new media %s (%s) at %s', self.id, self.device, self.mountpoint) if not self.mountpoint: self.mountpoint = self.device if not self.mountpoint.endswith('/'): self.mountpoint += '/' # get basic information from database media = self._beacon_controller._beacon_media_information(self) if isinstance(media, kaa.InProgress): # This will happen for the client because in the client # _beacon_media_information needs to lock the db. media = yield media self.beaconid = media['id'] prop['beacon.content'] = media['content'] self._beacon_isdir = False if media['content'] == 'file': self._beacon_isdir = True # TODO: choose self.thumbnails for media not / self.thumbnails = os.path.join(os.environ['HOME'], '.thumbnails') if self.root.get('title'): self.label = self.root.get('title') elif prop.get('volume.label'): self.label = utils.get_title(prop.get('volume.label')) elif prop.get('info.parent'): self.label = u'' parent = prop.get('info.parent') if parent.get('storage.vendor'): self.label += parent.get('storage.vendor') + u' ' if parent.get('info.product'): self.label += parent.get('info.product') self.label.strip() if self.device: self.label += ' (%s)' % self.device if not self.label: self.label = self.id else: self.label = self.id
def get_group_post_as_line(post_id, photos_dir): try: data = driver.find_element_by_xpath(selectors.get("single_post")) time = utils.get_time(data) title = utils.get_title(data, selectors).text # link, status, title, type = get_status_and_title(title,data) link = utils.get_div_links(data, "a", selectors) if link != "": link = link.get_attribute("href") post_type = "" status = '"' + utils.get_status(data, selectors).replace("\r\n", " ") + '"' photos = utils.get_post_photos_links(data, selectors, photos_small_size) comments = get_comments() photos = image_downloader(photos, photos_dir) line = (str(time) + "||" + str(post_type) + "||" + str(title) + "||" + str(status) + "||" + str(link) + "||" + str(post_id) + "||" + str(photos) + "||" + str(comments) + "\n") return line except Exception: return ""
def detail(inventory=None): """ 传统的获取系统信息的方法 根据系统的inventory获取系统信息,was信息和db2信息并渲染details.html返回 :param inventory: 系统IP :return: details.html: 系统详细信息页面,包括系统信息/was/db2信息 """ try: system_detail = System.query.filter_by( inventory=inventory).first_or_404() if PRODUCT: # 删除数据库中目前有的was/db2信息 # app.logger.debug("remove current WebSphere/DB2 info") # for one_was in was_detail: # db.session.delete(one_was) # for one_db2 in db2_detail: # db.session.delete(one_db2) # call ansible function to retrieve websphere,db2,system info for target inventory # current only realize get websphere info details_host_ok = details_ansible_run(inventory_in=inventory) app.logger.debug(system_detail) detail_update(system_detail, details_host_ok) db.session.commit() # app.logger.debug(details_host_ok) new_was_detail = WebSphere.query.filter_by( sys_inventory=inventory).all() new_db2_detail = DB2.query.filter_by(sys_inventory=inventory).all() return render_template("details.html", title=get_title("具体信息"), system_detail_in=system_detail, was_detail_in=new_was_detail, db2_detail_in=new_db2_detail) except Exception as e: app.logger.debug(e) # 更新失败,立刻回滚 db.session.rollback() return render_template("500.html")
def browse_papers(path_, csv_file, xmin=60): print("Processing citations ...") dict_1, dict_2 = parse_csv_file(csv_file) print("Processing files ...") papers_list = {} for root, dirs, files in os.walk(path_): for name in files: if name.endswith(( ".json" )): jfile = root + "/" + name data = json.load( open(jfile) ) year,month,day = get_date_jsonfile(jfile,data) journal = get_journal_short_json(jfile,data) issue,volume = get_issue_volume(jfile,data) coauthors = get_coauthors_jsonfile(jfile,data) title = get_title(jfile,data) doi_ = get_doi(jfile,data) if doi_ in dict_1.keys(): cits_ = len( dict_1[doi_] ) else: cits_ = 0 if doi_ in dict_2.keys(): refs_ = len( dict_2[doi_] ) else: refs_ = 0 if cits_ >= xmin: papers_list[doi_] = [ title.encode('utf-8'),str(journal),str(year),str(volume),str(issue),str(cits_),str(refs_) ] print("Database processed ...") return papers_list
def get_all_system(): sys_was_count_list = [] sys_db2_count_list = [] page = request.args.get('page', 1, type=int) # 对结果进行分页 paginate = System.query.paginate(page, NUM_PER_PAGE) systems = paginate.items for one_system in systems: sys_was_count = WebSphere.query.filter_by( sys_inventory=one_system.inventory).count() sys_db2_count = DB2.query.filter_by( sys_inventory=one_system.inventory).count() sys_was_count_list.append(sys_was_count) sys_db2_count_list.append(sys_db2_count) db.session.close() return render_template("all_system.html", inventory_filter_val="", title=get_title("主机信息列表"), system_list=systems, pagination=paginate, os_filter_val="", os_list_val=get_os_list(), sys_was_count_list=sys_was_count_list, sys_db2_count_list=sys_db2_count_list)
def fetcher(): for subgroup in d.available_subgroups: try: cursor.execute("SELECT * FROM %s;" %(subgroup['db_name'])) search_list = cursor.fetchall() except Exception as e: logging.error(e) for item in search_list: search_param = ' '.join(item) i = 1 while True: feed_url = ('%s?page=rss&term=%s&user=%s&offset=%d' %(nyaa_url, search_param, subgroup['nyaa_id'], i)).replace(' ', '+') i = i + 1 feed = feedparser.parse(feed_url) if not feed['entries']: break for feed_entry in feed['entries']: feed_title = feed_entry['title'] parsed_title = utils.get_title(feed_title, subgroup['regex']['title']) if parsed_title == item[0]: parsed_episode = utils.get_episode(feed_title, subgroup['regex']['episode']) if parsed_episode: cursor.execute("SELECT 1 FROM downloaded WHERE title='%s' AND episode='%s' AND subgroup = '%s';" %(item[0].replace("'", "''"), parsed_episode, subgroup['subgroup'])) if not cursor.fetchone(): dl_location = cfg.dl_location + item[0] if not os.path.exists(dl_location): try: os.mkdir(dl_location) except Exception as e: logging.error(e) payload.append({'title': item[0], 'link': feed_entry['link'], 'episode': parsed_episode, 'subgroup': subgroup['subgroup'], 'quality': item[1]}) #Send payload to WebAPI r = requests.post('http://%s:%s/api/addtorrent' %(API_URL, API_PORT), headers={'content-type':'application/json'}, data=json.dumps(payload))
from selenium import webdriver from selenium.webdriver.firefox.options import Options as FirefoxOptions from utils import get_title if __name__ == '__main__': options = FirefoxOptions() options.headless = True options.binary_location = '/usr/local/bin/firefox' driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', log_path='/tmp/geckodriver.log', firefox_options=options) title = get_title(driver) print(title)
def test_get_title_exception_first(self, fake_input, fake_print): fake_input.side_effect = ['', 'Test title'] result = utils.get_title() self.assertEqual(result, 'Test title') self.assertEqual(fake_print.call_count, 1)
# query = "subspace+clustering_N100_H1" query = "subgraph+mining" # query = "data+cleaning_N100_H1" # query = "image+descriptor_N100_H1" graph = nx.read_gexf("models/%s.gexf" % query, node_type=int) # print "The Dense", len(graph.in_edges(637)), \ # sum([a["weight"] for u,v,a in graph.in_edges(637, data=True)]), \ # np.mean([graph.out_degree(u) for u,v in graph.in_edges(637)]) # # print "GSpan", len(graph.in_edges(296)), \ # sum([a["weight"] for u,v,a in graph.in_edges(296, data=True)]), \ # np.mean([graph.out_degree(u) for u,v in graph.in_edges(296)]) # sys.exit() rank = rank_nodes(graph, 1.0, 1.0, 1.0, 1.0, ctx_relev=0.5, query_relev=0.5, age_relev=0.5, limit=15, out_file="graphs/ranks/%s.gexf" % query) print for node_id, paper_id, query_score, score, score_layers in rank : print "{%15s, %4d, %3d, %.4f} : [%.2f] %-70s | %s" % (paper_id, graph.node[node_id]["year"], len(graph.in_edges(node_id)), 100*query_score, 100*score, utils.get_title(paper_id)[:70], ' '.join(map(str,np.round(100*score_layers,3))))
def get_summary_text(): if request.method == 'GET': url = request.args.get('url') return jsonify(string=get_summary(url), title=get_title(url)) return "Not opening!"
def extract_and_write_posts(elements, filename): try: f = open(filename, "w", newline="\r\n") f.writelines( " TIME || TYPE || TITLE || STATUS || LINKS(Shared Posts/Shared Links etc) " + "\n" + "\n") for x in elements: try: title = " " status = " " link = "" time = " " # time time = utils.get_time(x) # title title = utils.get_title(x, selectors) if title.text.find("shared a memory") != -1: x = x.find_element_by_xpath(selectors.get("title_element")) title = utils.get_title(x, selectors) status = utils.get_status(x, selectors) if (title.text == driver.find_element_by_id( selectors.get("title_text")).text): if status == "": temp = utils.get_div_links(x, "img", selectors) if ( temp == "" ): # no image tag which means . it is not a life event link = utils.get_div_links( x, "a", selectors).get_attribute("href") type = "status update without text" else: type = "life event" link = utils.get_div_links( x, "a", selectors).get_attribute("href") status = utils.get_div_links(x, "a", selectors).text else: type = "status update" if utils.get_div_links(x, "a", selectors) != "": link = utils.get_div_links( x, "a", selectors).get_attribute("href") elif title.text.find(" shared ") != -1: x1, link = utils.get_title_links(title) type = "shared " + x1 elif title.text.find(" at ") != -1 or title.text.find( " in ") != -1: if title.text.find(" at ") != -1: x1, link = utils.get_title_links(title) type = "check in" elif title.text.find(" in ") != 1: status = utils.get_div_links(x, "a", selectors).text elif (title.text.find(" added ") != -1 and title.text.find("photo") != -1): type = "added photo" link = utils.get_div_links(x, "a", selectors).get_attribute("href") elif (title.text.find(" added ") != -1 and title.text.find("video") != -1): type = "added video" link = utils.get_div_links(x, "a", selectors).get_attribute("href") else: type = "others" if not isinstance(title, str): title = title.text status = status.replace("\n", " ") title = title.replace("\n", " ") line = (str(time) + " || " + str(type) + " || " + str(title) + " || " + str(status) + " || " + str(link) + "\n") try: f.writelines(line) except Exception: print("Posts: Could not map encoded characters") except Exception: pass f.close() except Exception: print("Exception (extract_and_write_posts)", "Status =", sys.exc_info()[0]) return
def browse_papers(path_, csv_file): print("Processing citations ...") dict_1, dict_2 = parse_csv_file(csv_file) # client = MongoClient('localhost', 27017) client = MongoClient() db = client['apsdb'] # Get a databese aps = db['aps-articles-basic'] # Get a collection print("Removing all record ...") aps.delete_many({}) # Clean the collection print("Processing files ...") tmp_list = [] for root, dirs, files in os.walk(path_): for name in files: if name.endswith((".json")): jfile = root + "/" + name data = json.load(open(jfile)) year, month, day = get_date_jsonfile(jfile, data) journal = get_journal_short_json(jfile, data) issue, volume = get_issue_volume(jfile, data) doi = get_doi(jfile, data) num_pages = get_number_of_pages(jfile, data) coauthors = get_coauthors_jsonfile(jfile, data) affiliations = get_all_affiliations(jfile, data) countries = get_all_countries(jfile, data) title = get_title(jfile, data) aps_paper = {'year': year, 'month': month, 'day': day} aps_paper['journal'] = journal aps_paper['issue'] = issue aps_paper['volume'] = volume aps_paper['doi'] = doi aps_paper['num_authors'] = len(coauthors) aps_paper['num_affs'] = len(affiliations) aps_paper['num_countries'] = len(countries) aps_paper['title'] = title aps_paper['title_length'] = len(title) aps_paper['num_pages'] = num_pages if doi in dict_1.keys(): aps_paper['citations'] = len(dict_1[doi]) else: aps_paper['citations'] = 0 if doi in dict_2.keys(): aps_paper['num_references'] = len(dict_2[doi]) else: aps_paper['num_references'] = 0 tmp_list.append(aps_paper) if len(tmp_list) > BIG_LIST_SIZE: aps.insert_many(tmp_list) tmp_list = [] if len(tmp_list) > 0: aps.insert_many(tmp_list) tmp_list = [] return aps