Beispiel #1
0
def syncCache():
    cache1 = Cache(GIT_DIR)
    cache1.start()
    
    cache2 = Cache(GIT_DIR)
    cache2.initial()
    
    for path in cache2.list():
        if not cache1.contains(path):
            cache1.update(path)
            if not isdir(join(CC_DIR, path.file)):
                copy(path.file)
    cache1.write()
Beispiel #2
0
def syncCache():
    cache1 = Cache(GIT_DIR)
    cache1.start()

    cache2 = Cache(GIT_DIR)
    cache2.initial()

    for path in cache2.list():
        if not cache1.contains(path):
            cache1.update(path)
            if not isdir(join(CC_DIR, path.file)):
                copy(path.file)
    cache1.write()
Beispiel #3
0
class DNS(BaseServer):
    # кэширующий распознователь DNS
    def __init__(self, port, forwarder):
        super(DNS, self).__init__(port)
        self._forwarder = forwarder
        self._forwarder_corrupted = False
        self._cache = Cache()
        self._lock = Lock()

    def _client_req_handler(self, addr, packet):
        self._client = addr
        if not self._forwarder_corrupted:
            # если запрос был возвращен, то помечаем forwarder как corrupted
            if addr[0] == self._forwarder[0]:
                self._forwarder_corrupted = True
                print(
                    '\n[-] Forwarder is corrupted (запрос был возвращен).\n'
                    '[!] Пожалуйста, выключите сервер и укажите другой forwarder'
                )
                self._return_server_resp(self._make_error_packet(packet))
        else:
            self._return_server_resp(self._make_error_packet(packet))

        question = self._get_question(packet)
        qname = get_qname(question)
        # тип вопроса
        qtype = struct.unpack('>H',
                              question[question.find(b'\x00') + 1:][:2])[0]
        from_cache = False
        response = b''

        if self._cache.contains(qname, qtype):
            with self._lock:
                response, from_cache = self._cache.get(qname, qtype,
                                                       packet[:2]), True
        if response in [b'', None]:
            response, from_cache = self._request_to_forwarder(
                qname, qtype, packet), False

        if not self._forwarder_corrupted:
            print("\n" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                  end=" ")
            print("-- [*] {} {} {}".format(addr[0], TYPES[qtype], qname),
                  end=" ")
            print('cache' if from_cache else 'forwarder')
            self._return_server_resp(response)

    # получить секцию QUESTION
    def _get_question(self, packet):
        spacket = packet[12:]
        return spacket[:spacket.find(b'\x00') + 5]

    # ответ сервера
    def _return_server_resp(self, packet):
        self._sock.sendto(packet, self._client)

    # пакет ошибки
    def _make_error_packet(self, packet):
        flags = '1' + set_padding(bin(packet[2])[2:])[1:]
        rcode = set_padding(bin(packet[3])[2:])

        return packet[:2] + struct.pack('>H', int(flags + rcode[:4] + '0010',
                                                  2)) + packet[4:]

    # запрос форвардеру
    def _request_to_forwarder(self, qname, qtype, packet):
        if packet is None:
            return
        with self._lock:
            error = False
            sock = self._make_socket()
            try:
                sock.sendto(packet, self._forwarder)
                npacket, addr = sock.recvfrom(BUFFER_SIZE)
            except socket.error:
                self._return_server_resp(self._make_error_packet(packet))
            finally:
                sock.close()
            question = self._get_question(npacket)
            qnames = self._cache.push(qname, qtype, question, npacket)
            Thread(target=self.cache_inner_fields, args=(qnames, )).start()
            return npacket

    def _check_if_query(self, packet):
        return set_padding(bin(packet[3])[2:])[0] == '0'

    # кэширование данных
    def cache_inner_fields(self, qnames):
        for qname in qnames:
            if qname in [None, '']:
                continue
            for qtype in self._cache.used_qtypes:
                self._request_to_forwarder(
                    qname, qtype, self.create_dns_request(qname, qtype))

    # DNS звпрос
    def create_dns_request(self, name, _type):
        with self._lock:
            name = name.encode()
            # интерпретация байтов как упакованные двоичные данные
            id = struct.pack('>H', randint(MIN_PORT, MAX_PORT))
            flags = b'\x01\x20'
            question = b'\x00\x01'
            answer = b'\x00\x00'
            authority = b'\x00\x00'
            addit = b'\x00\x00'

            qname = b''
            for part in name.split(b'.'):
                qname += struct.pack('B', len(part)) + part
            qtype = struct.pack('>H', _type)
            qclass = b'\x00\x01'
            return id + flags + question + answer + authority + addit + qname + qtype + qclass
Beispiel #4
0
class DNS(BaseServer):
    ''' Cached DNS resolver '''
    def __init__(self, port, forwarder):
        super(DNS, self).__init__(port)
        self._forwarder = forwarder
        self._forwarder_corrupted = False
        self._cache = Cache()
        self._lock = Lock()

    def _client_req_handler(self, addr, packet):
        print("In client request handler")
        self._client = addr

        if not self._forwarder_corrupted:
            if addr[0] == self._forwarder[0]:
                self._forwarder_corrupted = True
                print(CYCLE_MESSAGE)
                self._return_server_resp(self._make_error_packet(packet))
        else:
            self._return_server_resp(self._make_error_packet(packet))

        question = self._get_question(packet)
        qname = get_qname(question)
        qtype = struct.unpack('>H',
                              question[question.find(b'\x00') + 1:][:2])[0]

        from_cache = False
        response = b''

        if self._cache.contains(qname, qtype):
            with self._lock:
                response, from_cache = self._cache.get(qname, qtype,
                                                       packet[:2]), True
        if response in [b'', None]:
            response, from_cache = self._make_request2forwarder(
                qname, qtype, packet), False

        if not self._forwarder_corrupted:
            print("\n" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                  end=" ")
            print("-- [*] {} {} {}".format(addr[0], TYPES[qtype], qname),
                  end=" ")
            print('cache' if from_cache else 'forwarder')
            self._return_server_resp(response)

    def _get_question(self, packet):
        spacket = packet[12:]

        return spacket[:spacket.find(b'\x00') + 5]

    def _return_server_resp(self, packet):

        self._sock.sendto(packet, self._client)

    def _make_error_packet(self, packet):
        flags = '1' + set_padding(bin(packet[2])[2:])[1:]
        rcode = set_padding(bin(packet[3])[2:])

        return packet[:2] + struct.pack('>H', int(flags + rcode[:4] + '0010',
                                                  2)) + packet[4:]

    def _make_request2forwarder(self, qname, qtype, packet):
        if packet is None:
            return

        with self._lock:
            error = False
            sock = self._make_socket()
            try:
                sock.sendto(packet, self._forwarder)
                npacket, addr = sock.recvfrom(BUFFER_SIZE)
            except socket.error:
                self._return_server_resp(self._make_error_packet(packet))
            finally:
                sock.close()
            question = self._get_question(npacket)
            qnames = self._cache.push(qname, qtype, question, npacket)

            if qtype == 1:
                ns_packet = self.switch_a_to_ns(packet, qname)

                sock = self._make_socket()
                try:
                    sock.sendto(ns_packet, self._forwarder)
                    nepacket, addr = sock.recvfrom(BUFFER_SIZE)
                except socket.error:
                    self._return_server_resp(self._make_error_packet(packet))
                finally:
                    sock.close()
                question = self._get_question(nepacket)
                qnames = self._cache.push(qname, 2, question, nepacket)
                for auth_qname in qnames:
                    ns_packet = self.replace_qname(packet, qname, auth_qname)
                    sock = self._make_socket()
                    try:
                        sock.sendto(ns_packet, self._forwarder)
                        nepacket, addr = sock.recvfrom(BUFFER_SIZE)
                    except socket.error:
                        self._return_server_resp(
                            self._make_error_packet(packet))
                    finally:
                        sock.close()

                    question = self._get_question(nepacket)
                    self._cache.push(auth_qname, 1, question, nepacket)

            # Thread(target=self.cache_inner_fields, args=(qnames,)).start()

            return npacket

    def replace_qname(self, packet, qname, new_qname):
        f_part = packet[:packet.find(bytes(qname.split('.')[0], 'utf8')) - 1]
        l_part = packet[packet.find(bytes(qname.split('.')[-2], 'utf8')) +
                        len(qname.split('.')[-2]) + 1:]

        qwe = b''
        try:

            for w in new_qname.split('.'):
                qwe += struct.pack('>B', len(w)) + bytes(w, 'utf8')
        except:
            import sys
            print(sys.exc_info())
        return f_part + qwe + l_part

    def switch_a_to_ns(self, packet, qname):
        qname = qname.split('.')[-2]
        i = packet.find(bytes(qname, 'utf8')) + len(qname) + 2
        packet = packet[:i] + b'\x02' + packet[i + 1:]
        return packet

    def _check_if_query(self, packet):
        return set_padding(bin(packet[3])[2:])[0] == '0'

    def cache_inner_fields(self, qnames):
        for qname in qnames:
            if qname in [None, '']:
                continue
            for qtype in self._cache.used_qtypes:
                self._make_request2forwarder(
                    qname, qtype, self.create_dns_request(qname, qtype))

    def create_dns_request(self, name, _type):
        with self._lock:
            name = name.encode()

            id = struct.pack('>H', randint(MIN_VALUE, MAX_VALUE))
            flags = b'\x01\x20'
            question = b'\x00\x01'
            answer = b'\x00\x00'
            authority = b'\x00\x00'
            addit = b'\x00\x00'

            qname = b''
            for part in name.split(b'.'):
                qname += struct.pack('B', len(part)) + part
            qtype = struct.pack('>H', _type)
            qclass = b'\x00\x01'
            return id + flags + question + answer + authority + addit + qname + qtype + qclass
Beispiel #5
0
class Bing_Search(object):
    def __init__(self, api_key, data_dir=None):
        self.cache = None
        if data_dir:
            cache_file = data_dir + "/bing.json"
            self.cache = Cache(cache_file)
        self.stopext = set([".pdf", ".doc", ".xls"])
        self.headers = {'Ocp-Apim-Subscription-Key': api_key}

    def is_valid(self, url):
        if len(url)<4 or url[-4:] in self.stopext:
            return False
        return True

    def search(self, query_term, count=10):
        """
        Reference: https://docs.microsoft.com/en-us/rest/api/cognitiveservices/bing-web-api-v5-reference#query-parameters
        Args:
            count: The number of search results to return in the response. If count is greater than 50, paging will be used to fetch the results since maximum results of each query is 50
        """
        if self.cache and self.cache.contains(query_term):
            urls = self.cache.get(query_term)
            return [url for url in urls if self.is_valid(url)]
        urls = []
        offset = 0

        while count>0:
            params = urllib.urlencode({
                # Request parameters
                'q': query_term,
                'count': str(min(count, 50)),
                'offset': str(offset),
                'mkt': 'en-us',
                'safesearch': 'Moderate'})

            try:
                conn = httplib.HTTPSConnection('api.cognitive.microsoft.com')
                #conn.request("GET", "/bing/v5.0/search?%s" % params, "{body}", headers)
                conn.request("GET", "/bing/v7.0/search?%s" % params, "{body}", self.headers)
                response = conn.getresponse()
                data = response.read()
                obj = json.loads(data)
                if 'webPages' in obj:
                    webPages = obj['webPages']
                    values = webPages['value']
                    for value in values:
                        if self.is_valid(value['url']):
                            url = URLUtility.normalize(value['url'])
                            if url:
                                urls.append(url)
                conn.close()
            except:
                traceback.print_exc()

            count -= 50
            offset += 1

        if self.cache:
            self.cache.add(query_term, urls)
        return urls

    def search_site(self, keyword, url, k=10):
        """
        Search inside a given website using the search command: "keyword site:url"
        Parameters
            keyword: keyword used to search
            url: top level domain
        Returns 
            list of urls
        """
        keyword = keyword + " site:" + url
        return self.search(keyword, k)
Beispiel #6
0
class Search_APIs(object):
    def __init__(self, data_dir, fetcher):
        google_api_key = ""
        if not google_api_key:
            print "Error! google_api_key is missing"
            sys.exit(1)
        google_cse_id = ""  # Google custome search engine id
        if not google_cse_id:
            print "Error! google_cse_id is missing"
            sys.exit(1)

        self.google = Google_Search(google_api_key, google_cse_id)
        self.google_delay = 1  # 5QPS limit: https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits

        bing_api_key = ""
        if not bing_api_key:
            print "Error! bing_api_key is missing"
            sys.exit(1)
        self.bing = Bing_Search(bing_api_key)
        self.bing_delay = 1

        # Setting cache for related search
        related_cache_file = data_dir + "/related_search.json"
        self.related_cache = Cache(related_cache_file)
        print "Loaded ", self.related_cache.length(
        ), " queries from related search cache"

        # Setting cache for backlink search
        access_id = ""
        if not access_id:
            print "Error! access_id is missing"
            sys.exit(1)
        secret_key = ""
        if not secret_key:
            print "Error! secret_key is missing"
            sys.exit(1)
        self.moz = Moz_Search(access_id, secret_key)
        backlink_cache_file = data_dir + "/backlink_search.json"
        self.backlink_cache = Cache(backlink_cache_file)
        print "Loaded ", self.backlink_cache.length(
        ), " queries from backlink search cache"
        self.moz_delay = 1

        # Setting cache for keyword search
        keyword_cache_file = data_dir + "/keyword_search.json"
        self.keyword_cache = Cache(keyword_cache_file)
        print "Loaded ", self.keyword_cache.length(
        ), " queries from keyword search cache"

        # Setting cache for forward search
        #self.fetcher = Fetcher(data_dir, "/forward_search.json")
        self.fetcher = fetcher
        self.link_extractor = Link_Extractor()

        self.k = 10  # Number of keywords selected in each extraction
        self.max_urls = 10  # maximum number of urls to extract from each pages
        self.keywords = set()  # Keywords extracted from relevant sites

    def set_max_keywords(self, max_kw):
        self.k = max_kw

    def _extract_keywords(self, sites, k=10):
        """
        Extract top k most frequent keywords. Skip ones that were selected.
        """
        stop = stopwords.words('english')
        counter = Counter()
        for site in sites:
            for p in site:
                text = p.get_text('meta')
                text = URLUtility.clean_text(text)
                words = nltk.word_tokenize(text)
                words = [
                    word for word in words
                    if word not in stop and len(word) > 2
                ]
                bigram_words = [
                    words[i] + ' ' + words[i + 1]
                    for i in xrange(len(words) - 1)
                ]
                counter += Counter(words + bigram_words)

        # Get the topk words
        """
        counter = [(counter[w], w) for w in counter if counter[w]>1] # convert to array
        heapq.heapify(counter)
        topk = heapq.nlargest(k, counter)
        return [w[1] for w in topk]
        """
        top_words = counter.most_common(k + len(self.keywords))
        result = []  # list of keywords to return
        i = 0
        while len(result) < k and i < len(top_words):
            if top_words[i][0] not in self.keywords:
                result.append(top_words[i][0])
                self.keywords.add(top_words[i][0])
            i += 1
        print "    List of selected keywords: ", result
        return result

    def search(self, sites, searchop, seed_keyword="", max_results=50):
        """
        Args:
            max_results: Maximum number of results to return in Bing/Google search
            search: str - potential values: 'rl', 'kw', 'fw', 'bl'
        """
        #sites = self.fetcher.fetch_sites(urls)

        results = set()
        if searchop == 'rl':
            for w in sites:
                print "    Running related search..."
                urls = self.search_related(w.get_host(), max_results)
                results.update(urls)

        elif searchop == 'bl':
            """
            for w in sites:
                print "    Search backlinks..."
                urls = self.search_backward_forward(w.get_host())
                results.update(urls)
            """
            urls = self.search_backward_forward_batch(sites)
            results.update(urls)

        elif searchop == 'fw':
            #urls = [w.get_url() for w in sites]
            print "    Forward search...", len(sites), " urls"
            urls = self.search_forward_sites(sites)
            results.update(urls)

        # Run keyword search
        elif searchop == 'kw':
            print "    Searching by keyword"
            keywords = self._extract_keywords(sites, self.k)
            for keyword in keywords:
                if seed_keyword:
                    keyword = seed_keyword + ' ' + keyword
                urls = self.search_keywords(keyword, max_results, se='bing')
                results.update(urls)

        print "    Found ", len(results), " urls"
        return results

    def search_backward_forward(self, url):
        """
        Search related pages using backlink search and forward search

        Returns:
            - list of urls (potentially duplicated)
        """
        t = time.time()
        backlinks = self.search_backward(url)
        print "Backlink search time: ", time.time() - t
        t = time.time()
        fwlinks = self.search_forward(backlinks)
        print "Forward search time: ", time.time() - t
        return backlinks + fwlinks

    def search_backward_forward_batch(self, sites):
        """
        Search related pages using backlink search and forward search

        Parameters:
            - sites: list of Website objects
        Returns:
            - list of urls (potentially duplicated)
        """
        t = time.time()
        backlinks = set()
        for site in sites:
            backlinks.update(self.search_backward(site.get_host()))
        backlinks = list(backlinks)
        print "Backlink search time: ", time.time() - t

        t = time.time()
        fwlinks = self.search_forward(backlinks)
        print "Forward search time: ", time.time() - t
        return backlinks + fwlinks

    def search_backward(self, url):
        """
        Search backlinks using MOZ APIs

        Returns:
            - list of urls 
        """
        if self.backlink_cache.contains(url):
            results = self.backlink_cache.get(url)
            print "hit backlink query: ", url
        else:
            #time.sleep(self.moz_delay)
            results = self.moz.search_backlinks(url)
            self.backlink_cache.add(url, results)

        print "Backlink Search - Query: ", url, " - Number of results: ", len(
            results)
        return results

    def search_keywords(self, keyword, max_results, se='google'):
        """
        Search relevant pages by keyword using Google

        Args:
        max_results: maximum number of results to return

        """
        urls = []
        if self.keyword_cache.contains(keyword):
            urls = self.keyword_cache.get(keyword)
            print "hit keyword query: ", keyword
        else:
            if se == 'google':
                time.sleep(self.google_delay)
                urls = self.google.search(keyword, max_results)
            else:  # default: 'bing'
                time.sleep(self.bing_delay)
                urls = self.bing.search(keyword, max_results)
            self.keyword_cache.add(keyword, urls)
        """
        if 'items' in results:
            for item in results['items']:
                urls.append(url_normalize(item['link']))
        """

        print "Keyword Search - Query: ", keyword, " - Number of results: ", len(
            urls)
        return urls

    def search_forward_sites(self, sites, insite=False):
        """
        Fetch the pages and extract external links. 
        Args
            - sites: list of Website objects
            - insite: False if extracting links outside the host.
        """
        outlinks = set()
        for site in sites:
            for page in site:
                if insite:
                    links = self.link_extractor.extract_insite_links(
                        page.get_url(), page.get_html())
                else:
                    links = self.link_extractor.extract_external_links(
                        page.get_url(), page.get_html())
                links = self.select_subset(links)
                outlinks.update(links)

        print "Forward Search ", " - Number of results: ", len(outlinks)
        return list(outlinks)

    def search_forward(self, urls, insite=False):
        """
        Fetch the pages and extract external links
        Args
            - urls: list of urls
            - insite: False if extracting links outside the host.
        """
        sites = self.fetcher.fetch_sites(urls, allow_fetch_later=True)
        outlinks = set()
        for site in sites:
            for page in site:
                if insite:
                    links = self.link_extractor.extract_insite_links(
                        page.get_url(), page.get_html())
                else:
                    links = self.link_extractor.extract_external_links(
                        page.get_url(), page.get_html())
                links = self.select_subset(links)
                outlinks.update(links)

        print "Forward Search ", " - Number of results: ", len(outlinks)
        return list(outlinks)

    def select_subset(self, urls):
        """
        Each page might contain thousand of external urls which pollute the results, so we only keep a fixed number of links from each page
        How this works:
            - Pick one url in each site  
            - If not yet reaching max, select random urls
        Returns:
            - list of urls
        """
        if len(urls) <= self.max_urls:
            return urls

        results = []
        """
        cur = urls
        while len(results)<self.max_urls:
            sites = set()
            next = []
            for url in cur: 
                site = URLUtility.get_host(url)
                if site not in sites:
                    sites.add(site)
                    results.append(url)
                else:
                    next.append(url)
                if len(results) == self.max_urls:
                    break
            cur = next
        """
        sites = set()
        for url in urls:
            site = URLUtility.get_host(url)
            if site not in sites:
                sites.add(site)
                results.append(url)
            if len(results) == self.max_urls:
                break

        return results

    def search_related(self, url, k):
        """
        Return list of related urls using Google related search
        """
        query = "related:" + url
        urls = []
        if self.related_cache.contains(query):
            urls = self.related_cache.get(query)
            print "hit related query: ", query
        else:
            time.sleep(self.google_delay)
            urls = self.google.search(query, k)
            self.related_cache.add(query, urls)
        """
        urls = []
        if 'items' in results:
            for item in results['items']:
                urls.append(url_normalize(item['link']))
        """

        print "Related Search - Query: ", url, " - Number of results: ", len(
            urls)
        return urls
Beispiel #7
0
class NewsSentiment:
    __positiveList = []
    __negativeList = []
    __stopwords = []

    def __init__(self):
        self.__cache = Cache("news-sentiment.json")
        self.__cityCache = Cache("city-cache.json")

        with open("positive.txt") as pFile:
            self.__positiveList = pFile.read().lower().split()

        with open("negative.txt") as nFile:
            self.__negativeList = nFile.read().lower().split()

        with open("stopwords.txt") as sFile:
            self.__stopwords = sFile.read().lower().split()

        self.__key = "4e28e4b30b954544b5d808b4d54b37a4"

        self.__positiveCount = 0
        self.__negativeCount = 0
        self.__stopwordCount = 0

        with open("news-id.json") as nifile:
            self.__news_id = json.load(nifile)
            self.__news_id_str = ','.join(map(str, self.__news_id))

    def __find(self, needle, haystack):
        found = []
        h_len = len(haystack)
        n_len = len(needle)

        for i in range(0, h_len):
            str_part = ""
            innerLen = 0
            if i + n_len <= h_len:
                innerLen = i + n_len
            else:
                break

            for j in range(i, innerLen):
                str_part += haystack[j]

            h1 = hash(needle + "_salt") % 1997
            h2 = hash(str_part + "_salt") % 1997

            if h1 == h2:
                found.append(i)

        return found

    def calculate_polarity(self, word):
        polarity = 0
        parts = word.split()
        p = 0
        n = 0
        s = 0
        for part in parts:
            part = part.strip().lower()
            if part in self.__positiveList:
                polarity += 1
                p += 1
            elif part in self.__negativeList:
                polarity -= 1
                n += 1
            elif part in self.__stopwords:
                s += 1

        return polarity, p, n, s

    def prefetch_news_city(self, cities):
        for city in cities:
            self.fetch_news_sentiment(city)

    def fetch_news_sentiment(self, country):
        country = country.lower()
        api_id = ','.join(self.__news_id)
        print("Analyzing news for ", country, "...")
        sentiments = []

        if not self.__cityCache.contains(country):
            url = "https://newsapi.org/v2/everything?q=" + country + "&sources=" + api_id + "&apiKey=" + self.__key
            country_news = requests.get(url).json()
            self.__cityCache.set(
                country, {
                    "articles": country_news["articles"],
                    "totalResults": country_news["totalResults"]
                })

        cache_country = self.__cityCache.get(country)
        articles = cache_country["articles"]
        res_count = cache_country["totalResults"]

        _len = 0
        if res_count > 6:
            _len = 6
        else:
            _len = res_count

        for i in range(0, _len):
            article = articles[i]
            title = article["title"]
            url = article["url"]

            key = url

            if not self.__cache.contains(key):
                try:
                    news = Article(url)
                    news.download()
                    news.parse()

                    title_score, tp, tn, ts = self.calculate_polarity(title)
                    news_score, np, nn, ns = self.calculate_polarity(news.text)

                    sentiment = {
                        "title": title_score,
                        "news": news_score,
                        "total": (title_score + news_score),
                        "stats": {
                            "p": tp + np,
                            "n": np + nn,
                            "s": ts + ns
                        }
                    }
                    self.__cache.set(key, sentiment)
                except:
                    print("Unable to fetch news for ", country, "! Msg: ", ex)

            sentiment = self.__cache.get(key)
            self.__positiveCount += sentiment["stats"]["p"]
            self.__negativeCount += sentiment["stats"]["n"]
            self.__stopwordCount += sentiment["stats"]["s"]

            sentiments.append(sentiment)

        return sentiments

    def showWordStatistics(self):
        sentiment_plotter.plotResults(self.__positiveCount,
                                      self.__negativeCount,
                                      self.__stopwordCount)
Beispiel #8
0
NewsSentiment().prefetch_news_city(cities)

print("Plotting cities on map...")
if not os.path.exists("static\\city_on_map.html"):
    mark_cities.plot_map_with_marked_cities(cities, "static\\city_on_map.html")

print("Building graph for cities...")
graphCities = MapGraph()

print("Getting coordinates for each cities...")

for city in cities:
    print("Acquiring coordinates for ", city, "...")
    k_city = city.replace(" ", "_").lower()

    if city_coord.contains(k_city):
        print("Coordinate exist!")
    else:
        coordinate = city_mapping.convertCityToCoordinates(city)
        print(city, " is at ", coordinate)
        city_coord.set(k_city, coordinate)

for fromCity in cities:
    for toCity in cities:
        k_from = fromCity.replace(" ", "_").lower()
        k_to = toCity.replace(" ", "_").lower()

        distance = None
        if fromCity == toCity:
            print("Same origin and destination. Distance = 0")
            distance = 0