예제 #1
0
class LinkMiner:
    data = None

    def __init__(self, sources: list, targets: list):
        self.crawler = Crawler(sources=sources, targets=targets)
        self.graph = Digraph(strict=True, engine='circo')
        self.graph.graph_attr['overlap'] = 'false'

    def extract(self):
        self.data = self.crawler.run()
        nodes = Counter(self.data['nodes'])
        top = max(nodes.values())
        for node in nodes.keys():
            self.graph.node(
                node, node, **{
                    'size': str(max([nodes[node], int(top / 4)])),
                    'fontsize': str(max([nodes[node],
                                         int(top / 4)]))
                })
        for edge in self.data['edges']:
            self.graph.edge(edge['source'], edge['target'])

    def render(self, filename='untitled'):
        self.graph.render(f'{filename}.gv', view=True)

    def export_json(self, filename):
        string = json.dumps(self.data['edges'])
        with open(f'{filename}.json', 'w') as file:
            file.write(string)
예제 #2
0
from src.crawler import Crawler
from src.feeder import Feeder

from warnings import filterwarnings
import pymysql as pymysql
filterwarnings('ignore', category = pymysql.Warning)

parser = argparse.ArgumentParser(description="Download all Pokemon Showdown's stats files, and fill a database with its stats.")
parser.add_argument("dbms", help="Database Management System", choices=["mysql"])
parser.add_argument("host", help="Database address")
parser.add_argument("user", help="Database user")
parser.add_argument("password", help="User password")
parser.add_argument("dbname", help="Database name")
group = parser.add_mutually_exclusive_group()
group.add_argument("-p", "--only-parse", "--skip-download", help="do not download any file from the internet and only use available local files to build the database", action="store_true")
# group.add_argument("-d", "--only-download", "--skip-parse", help="do not parse and do not store any file in a database, and only download files from the internet", action="store_true")
parser.add_argument("-F", "--folder", help="folder to use to download files into, and to parse from")
parser.add_argument("-f", "--file", help="only process a single specific file")
parser.add_argument("-v", "--verbose", help="be verbose", action="store_true")
args = parser.parse_args()

# Phase 1 : Download
print args
if not args.only_parse:
    crawler = Crawler('')
    crawler.run()

# Phase 2 : Parse
feeder = Feeder('stats')
feeder.feedAll(args.dbms, args.host, args.user, args.password, args.dbname)