def get_infected_times(filename, meme): lines = [] domain_times = {} entries = [] with open(filename, 'r') as f: for line in f: line = line.strip() if (line): lines.append(line) else: entry = Entry(lines) lines = [] for quote in entry.quotes: if meme in quote: entry_domain = entry.get_base_url() if entry_domain not in domain_times: domain_times[entry_domain] = [entry.timestamp, None] for link in entry.links: link_domain = base_url(link) if (link_domain != entry_domain and link_domain in domain_times): domain_times[link_domain][1] = entry.timestamp return domain_times
def count_base_urls(file): lines = [] base_urls = collections.defaultdict(int) with open(file, 'r') as f: for line in f: line = line.strip() if (line): lines.append(line) else: entry = Entry(lines) lines = [] # update map base_urls[entry.get_base_url()] += 1 return base_urls