def openurl(self): urls = get_urls(self.current_status().text) for url in urls: try: os.system(self.conf.params["openurl_command"] % url) except: pass
def openurl(self): urls = get_urls(self.current_status().text) for url in urls: try: os.system(self.conf.params['openurl_command'] % url + '> /dev/null 2>&1') except: pass
def openurl(self): urls = get_urls(self.current_status().text) for url in urls: try: os.system(self.conf.params['openurl_command'] % url + '> /dev/null 2>&1') except: logging.error('openurl error')
def add_message_to_view(self, channel, user, message, force=False): if user != self.nick or force: if user == self.last_nick[channel]: user = "******" * (len(user) + 2) else: self.last_nick[channel] = user user += ": " self.add_text_with_tag(channel, user, "nick") end = self.buffers[channel].get_end_iter() offset = end.get_offset() tag = "message1" if self._last_tag == "message2" else "message2" self._last_tag = tag self.add_text_with_tag(channel, message + "\n", tag) end = self.buffers[channel].get_iter_at_offset(offset) if self.last_nick[channel] != self.nick: self.search_and_mark(channel, self.nick, end, "mention") offset = end.get_offset() for url in get_urls(message): end = self.buffers[channel].get_iter_at_offset(offset) self.search_and_mark(channel, url, end, "url")
def shorter_url(self): self._set_service() long_urls = get_urls(self.content) for long_url in long_urls: short_url = self.shorter.do_shorter(long_url) try: self.content = self.content.replace(long_url, short_url) except UnicodeDecodeError: pass
def get_film_list(): """ 获取电影信息 :return: """ from utils import get_urls func_name = inspect.stack()[0][3] hlog.enter_func(func_name) film_list = list() film_objs = session.query(Film).all() for obj in film_objs: film_id = obj.id hlog.var('film_id', film_id) location_list = get_urls('Location', film_id) people_list = get_urls('People', film_id) specie_list = get_urls('Specie', film_id) vehicle_list = get_urls('Vehicle', film_id) film = { "id": obj.id, "title": obj.title, "description": obj.description, "director": obj.director, "producer": obj.producer, "release_date": obj.release_date, "rt_score": obj.rt_score, "url": obj.url, "people": people_list, "species": specie_list, "locations": location_list, "vehicles": vehicle_list } film_list.append(film) hlog.info("读取电影信息成功。") hlog.exit_func(func_name) return film_list
def on_command(self, event, response): text = event['text'] if text: urls = get_urls(text) for url in urls: resp = dict(response) resp.update(attachments=json.dumps([IsUp.is_up(url)])) self.bot.sc.api_call('chat.postMessage', **resp) else: raise PluginException('No website to check! e.g. `!isup google.com`')
def start_requests(self): urls = utils.get_urls(filename=self.filename) # debug purposes with open(generated_urls, 'w') as f: for url in urls: f.write(f'{url}\n') for url in urls: yield scrapy.Request(url=url, callback=self.parse_url)
def __init__(self, filename='partner_urls.txt', outfile=None, **kwargs): # default to loading partner_file if .json is found ext = str(filename).split('.')[-1] self.outfile = outfile # load in urls from plain text file if ext != 'json': self.data = {} self.urls = utils.get_urls(filename) # load in generated output from main.py else: f = open(partner_file, 'r') self.data = json.load(f) self.urls = [k for k in self.data.keys()] super().__init__(**kwargs)
def search(question): urls = utils.get_urls( question ) #Sends question to get_urls function in utils.py, which googles query and generates list of suitable urls if utils.getqtype(question) == "who": names_dict = utils.get_names(urls, question) if utils.getqtype(question) == "when": names_dict = utils.get_dates(urls) number = 0 for i in names_dict: if names_dict[i] > number: answer = i number = names_dict[i] return render_template("search.html", question=question, urls=urls, names_dict=names_dict, answer=answer)
class TwitterSpider(scrapy.Spider): name = 'tspider' allowed_domains = ['twitter.com'] start_urls = utils.get_urls(filename='en_sent.csv', index=0) def parse(self, response): # get soup from response soup = BeautifulSoup(response.body, 'lxml') # get text from soup tweet_text = soup.find('div', { 'class': 'js-tweet-text-container' }).find('p').text # get tweet id from url tweet_id = soup.find('div', { 'class': 'permalink-tweet' }).get('data-item-id') # return data yield {'tid': tweet_id, 'text': tweet_text}
def get_urls(text): """ get urls from message text """ m = 'oc.tc/' return filter(lambda x: m in x and not x.endswith('s/'), get_urls(text))
import sys import utils import os def get_issue_names(seq): """Here seq will have aal elemants from which we can get issues. """ res = [] for i in seq: issue = os.path.join('/', '/'.join(i.lstrip('/').split('/', 2)[:2])) if issue not in res: res.append(issue) return list(set(res)) issues = sys.argv[1:] if not issues: site="http://kottapalli.in" main_page = utils.read_page(site) urls = utils.get_urls(main_page) issues = get_issue_names(utils.get_similar_strs(urls, '/\d{4}/\d{2}')) else: #Filter the given urls for our need issues = utils.get_similar_strs(issues, '/\d{4}/\d{2}$') print ' '.join(issues)
def filter_without_url(self): urls = get_urls(self.status.text) if len(urls) == 0: return True return False