Ejemplo n.º 1
0
class Crawler(object):
    def __init__(self, descriptor):
        self.descriptor = descriptor
        self.file_client = FileClient()

    def crawl(self, stop_dated):
        print('starting to crawl')
        crawled_dict = {}
        for url in self.descriptor.newsline:
            print('navigating to url %s' % url)

            page_source_code = requests.get(url).text
            for item in self.descriptor.newsline.parse(page_source_code):
                print(item.pubdate)
                if stop_dated > item.pubdate:
                    self.file_client.append(crawled_dict)
                    return crawled_dict

                article_source_code = requests.get(item.url).text
                article = self.descriptor.parse_article(article_source_code)

                print(item.title)
                crawled_dict[item.url] = {
                    'title': item.title,
                    'pubdate': item.pubdate,
                    'body': article.body
                }
Ejemplo n.º 2
0
def delete(file_name):
    delete_local(file_name)
    ip, port, timestamp, host_id = (None, None, None, None)

    ## 查询锁控制服务器,查看该文件是否被加互斥锁
    with grpc.insecure_channel(str(LOCK_IP) + ':' + str(LOCK_POT)) as channel:
        lock_client = LockClient(channel)
        # 0 无锁, 1为共享锁,2 为互斥锁
        result = lock_client.lock_exist_test(file_name)

        if (result != 0):
            return "Can't delete it! "

    ## 查询路径服务器获得文件所在的ip,port
    with grpc.insecure_channel(str(DIRECTORY_IP) + ':' +
                               str(DIRECTORY_PORT)) as channel:
        directory_client = DirectoryClient(channel)
        tfile_name, ip, port, timestamp, host_id = directory_client.get_file_address(
            file_name)
        if tfile_name == '':
            return "No such file!"
        temp = directory_client.get_file_all_address(file_name)
        timestamp = time.time()
        if (not directory_client.del_file_address(file_name, timestamp)):
            return "Unknown Error"

    ## 访问文件服务器,删除文件
    for ip, port in temp:
        with grpc.insecure_channel(str(ip) + ':' + str(port)) as channel:
            file_client = FileClient(channel)
            result = file_client.delete(file_name)

    return True
Ejemplo n.º 3
0
def read(file_name):

    ip, port, timestamp, host_id = (None, None, None, None)

    ## 查询锁控制服务器,查看该文件是否被加互斥锁
    with grpc.insecure_channel(str(LOCK_IP) + ':' + str(LOCK_POT)) as channel:
        lock_client = LockClient(channel)
        # 0 无锁, 1为共享锁,2 为互斥锁
        result = lock_client.lock_exist_test(file_name)

        if (result == 2):
            return "Can't access this file, another client is writing!"
        else:
            ## 给文件加共享锁,0表示共享锁,1表示互斥锁
            if (lock_client.lock_file(file_name, 0)):
                pass
            else:
                return "Unknown Error"

    ## 查询路径服务器获得文件所在的ip,port
    with grpc.insecure_channel(str(DIRECTORY_IP) + ':' +
                               str(DIRECTORY_PORT)) as channel:
        directory_client = DirectoryClient(channel)
        file_name, ip, port, timestamp, host_id = directory_client.get_file_address(
            file_name)
        if file_name == '':
            return "No such file"
        #print(ip,":",port)

    ## 访问文件服务器,访问文件
    result_str = ''
    with grpc.insecure_channel(str(ip) + ':' + str(port)) as channel:
        file_client = FileClient(channel)
        #print(ip,":",port)
        result_str = file_client.read(file_name)

    ## 释放共享锁
    with grpc.insecure_channel(str(LOCK_IP) + ':' + str(LOCK_POT)) as channel:
        lock_client = LockClient(channel)
        ## 给文件解锁,0表示共享锁,1表示互斥锁
        if (lock_client.unlock_file(file_name, 0)):
            pass
        else:
            return "Unknown Error"

    return result_str
Ejemplo n.º 4
0
def upload(file_name):
    '''
    采用瞬时一致性通信模型
    '''
    ## 判断文件是否存在,如果存在,不允许上传
    with grpc.insecure_channel(str(DIRECTORY_IP) + ':' +
                               str(DIRECTORY_PORT)) as channel:
        directory_client = DirectoryClient(channel)
        tfile_name, ip, port, timestamp, host_id = directory_client.get_file_address(
            file_name)
        if tfile_name:
            return False

    t_file_name = os.path.join(ROOT_PATH, file_name)
    with open(t_file_name, 'r', encoding='utf-8') as f:
        result_str = f.read()

        ## 获取所有的文件服务器的地址
        with grpc.insecure_channel(
                str(DIRECTORY_IP) + ':' + str(DIRECTORY_PORT)) as channel:
            directory_client = DirectoryClient(channel)
            server_list = directory_client.get_all_server()

        ## 向所有的文件服务器写入该文件
        for ip, port in server_list:
            with grpc.insecure_channel(str(ip) + ':' + str(port)) as channel:
                file_client = FileClient(channel)
                if not file_client.upload(file_name, result_str):
                    print("Unknown Error from", ip, port)
                else:
                    ## 把写入服务器文件的更新消息反馈给路径服务器
                    with grpc.insecure_channel(
                            str(DIRECTORY_IP) + ':' +
                            str(DIRECTORY_PORT)) as channel:
                        directory_client = DirectoryClient(channel)
                        timestamp = time.time()
                        directory_client.add_file_address(
                            file_name, ip, port, timestamp)
        return True
Ejemplo n.º 5
0
 def __init__(self):
     file_client = FileClient()
     self.items = file_client.read()
Ejemplo n.º 6
0
    Logger.init_log(args.output)

    if args.sqlite:
        db_client = SqliteClient()
        db_client.connect(path=args.dbpath)
    elif args.mysql:
        db_client = MysqlClient()
        db_client.connect(host=args.dbaddr,
                          port=int(args.dbport),
                          user=args.dbuser,
                          pwd=args.dbpwd,
                          schema=args.dbschema)
    elif args.csv:
        if args.dbdir != '':
            db_client = FileClient(dir=args.dbdir)
        else:
            db_client = FileClient()
    elif args.kdb:
        db_client = KdbPlusClient()
        db_client.connect(host=args.dbaddr, port=int(args.dbport))
    else:
        print('Error: Please define which database is used.')
        parser.print_help()
        sys.exit(1)

    # Subscription instruments
    if args.instmts is None or len(args.instmts) == 0:
        print(
            'Error: Please define the instrument subscription list. You can refer to subscriptions.ini.'
        )
 def setUpClass(cls):
     Logger.init_log()
     cls.db_client = FileClient(dir=path)
     cls.db_client.connect()
Ejemplo n.º 8
0
 def __init__(self, descriptor):
     self.descriptor = descriptor
     self.file_client = FileClient()