def get_infected_times(filename, meme):
    lines = []
    domain_times = {}
    entries = []
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if (line):
                lines.append(line)
            else:
                entry = Entry(lines)
                lines = []
                
                for quote in entry.quotes:
                    if meme in quote:
                        entry_domain = entry.get_base_url()
                        if entry_domain not in domain_times:
                            domain_times[entry_domain] = [entry.timestamp, None]

                        for link in entry.links:
                            link_domain = base_url(link)
                            if (link_domain != entry_domain
                                and link_domain in domain_times):
                                domain_times[link_domain][1] = entry.timestamp

    return domain_times
Exemple #2
0
def count_base_urls(file):
    lines = []
    base_urls = collections.defaultdict(int)
    with open(file, 'r') as f:
        for line in f:
            line = line.strip()
            if (line):
                lines.append(line)
            else:
                entry = Entry(lines)
                lines = []
                # update map
                base_urls[entry.get_base_url()] += 1
                
    return base_urls