Esempio n. 1
0
 def __create_cloud__(self, dir_name):
     """
     create the database and create the table in it
     """
     if not os.path.isfile(dir_name):
         f = open(dir_name, 'w')
         table_str = ''' CREATE TABLE segments_data(id INTEGER PRIMARY KEY, segment_name TEXT,
                     password TEXT, users_premissions TEXT)'''
         db.create_table(dir_name, table_str)
         f.close()
Esempio n. 2
0
def create_db(dir_name, admin_username, admin_password, admin_directory):
    """
    create the users database and write the admin details
    """
    global MAIN_HOST
    if not os.path.isfile(dir_name):
        table_str = ''' CREATE TABLE users(id INTEGER PRIMARY KEY,
                     username TEXT, password TEXT, ip TEXT , isActive INTEGER , premissions TEXT, directory_folder TEXT)'''
        f = open(dir_name, 'w')
        db.create_table(dir_name, table_str)
        f.close()
        db.register(dir_name, admin_username, hash(admin_password), 'ALL',
                    MAIN_HOST, admin_directory)
Esempio n. 3
0
def dfs_spider(url, word, max_pages):
    pages_to_visit = [url]
    pages_visited = {url}
    number_of_pages_visited = 0

    # pages_visited.add(url)
    found_word = False

    table_name = 'PageDetails'
    create_table(table_name)
    while number_of_pages_visited < max_pages and pages_to_visit != []:
        number_of_pages_visited += 1
        url = pages_to_visit.pop(0)

        try:
            print("---------------------------")
            print(" ")
            print(number_of_pages_visited, "Visiting:", url)
            # print(number_of_pages_visited, "Visiting:", url)
            crawler = WebCrawler()
            data, links = crawler.get_links(url)
            if data.find(word) > -1:
                num_of_occurrences = len(list(find_all_occurrences(data, word)))
                found_word = True
                words = get_words(url)
                page_details = get_page_details(url, word, num_of_occurrences, links)
                add_item(page_details, table_name)
                print(" ")
                print("Found the keyword!")
                print(" ")
                print("The most relevant keywords in the page are: ")
                print(words[:5])

            for link in links:
                if link not in pages_visited:
                    pages_visited.add(link)
                    pages_to_visit.insert(0, link)
        except Exception as e:
            print(str(e))

    print("---------------------------")
    print(" ")
    if not found_word:
        print("Keyword not found...")
Esempio n. 4
0
  G = nx.Graph()

  G.add_node(word)
  #urls = response(['Items'])
  # print(response)
  for url in response['Items']:
    G.add_node(url['url'])
    G.add_edge(word,url['url'],weight = url['word_count'])

  nx.draw(G,with_labels=True)
  plt.savefig("chart.png")
  plt.show()

if __name__ == '__main__':
  table_name = 'PageDetails'
  db.create_table(table_name)
  item = {}
  item['url'] = 'url1'
  item['word'] = 'word1'
  item['word_count'] = 15
  item['hyperlinks'] = ['hard', 'to', 'combine']
  db.add_item(item, table_name)

  item2 = {}
  item2['url'] = 'hard'
  item2['word'] = 'word1'
  item2['word_count'] = 16
  item2['hyperlinks'] = ['blabla', 'to', 'combine']
  db.add_item(item2, table_name)
  draw_chart("word1",2)
Esempio n. 5
0
 def __cloud_add_segment_(self, table_str):
     """
     create the table in the database
     """
     db.create_table(self.cloud_dir, table_str)