Ejemplo n.º 1
0
    def new_commit(self, content, pdfbits):
        """
        Handle new commit based on [content] and [pdfbits].
        inputs:
            content: Contents of the paper.
            pdfbits: The pdf file in bits.
        """
        # Check all required field exist
        title = content['title']
        keywords = content['keywords']
        timestamp = content['timestamp']
        descriptions = content['descriptions']

        # Write pdf file to [fname]
        # Get legal file name
        fname = self._title2fname_(title)
        with open(os.path.join(self.papers_dir, fname), 'wb') as f:
            f.write(pdfbits)

        # Update keywords
        for kw in keywords:
            print(kw)
            self.insert_keyword(kw, title, timestamp)

        # Update descriptions
        for desc in descriptions:
            print(desc)
            self.insert_description(desc, descriptions[desc], title)

        # Done
        logger.info(f'PAPERS_SERVER received new commit of {title}')
Ejemplo n.º 2
0
 def update(self):
     """
     Update pdfs from buffer folder.
     yield:
         self.pdfs
     """
     self.read_ignores()
     pdfs = pd.DataFrame()
     # Walk through folder
     # names = [n for n in os.listdir(self.buffer_dir)
     #          if all([n.endswith('.pdf'),
     #                  n not in self.ignores.name.values])]
     names = [n for n in os.listdir(self.buffer_dir) if n.endswith('.pdf')]
     paths = [os.path.join(self.buffer_dir, n) for n in names]
     pdfs['name'] = names
     pdfs['path'] = paths
     # Parse informations
     for entry, method in [('atime', os.path.getatime),
                           ('ctime', os.path.getctime),
                           ('mtime', os.path.getmtime)]:
         pdfs[entry] = [
             time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(method(p)))
             for p in paths
         ]
     # Set index
     self.pdfs = pdfs.set_index('name', drop=True)
     # Done
     logger.info('Buffered file names updated.')
Ejemplo n.º 3
0
 def __init__(self):
     self.papers_dir = profiles.papers_dir
     self.keywords_path = os.path.join(self.papers_dir, 'keywords.json')
     self.descriptions_path = os.path.join(
         self.papers_dir, 'descriptions.xlsx')
     self.read_keywords()
     self.read_descriptions()
     logger.info('PAPERS_SERVER started.')
Ejemplo n.º 4
0
 def new_ignore(self, name):
     """
     Add new ignores by [name].
     Update this buffer.
     inputs:
         name: File name to be ignored.
     yield:
         Update self.ignores and write to disk.
     """
     if name not in self.ignores.name.values:
         self.ignores = self.ignores.append({'name': name},
                                            ignore_index=True)
         self.ignores.to_json(self.ignores_path)
     logger.info(f'Ignore name: {name}')
     self.update()
Ejemplo n.º 5
0
 def get_by_name(self, name):
     """ Get paper and its path based on [name]
     output:
         bits: Bitstream of the pdf file
         fpath: Path to the pdf file """
     # Assert we have the paper
     assert (name in self.pdfs.index)
     # Get fpath
     fpath = self.pdfs['path'][name]
     info = PdfReader(fpath).Info
     # Get bits
     with open(fpath, 'rb') as f:
         bits_list = f.readlines()
     bits = b''.join(bits_list)
     logger.info(f'BUFFER_SERVER get_by_name success on {name}')
     return fpath, info, bits
Ejemplo n.º 6
0
 def buffer_get(self, name, method='open'):
     """
     Get file by [name] in buffer_server using [method='open' or 'start']
         method: 'open' means return bits stream
                 'start' means start the file using default app
     """
     try:
         assert (method in ['start', 'open'])
         fpath, info, bits = self.buffer_server.get_by_name(name)
         if method == 'open':
             return bits
         if method == 'start':
             logger.info(f'WORKER buffer_get starts {name}')
             os.system(fpath)
             return None
     except Exception as e:
         logger.error(f'WORKER buffer_get failed: {e}')
         return None
Ejemplo n.º 7
0
    def buffer_commit(self, name, content):
        """ Handle new commit based on [name] and [content]. 
        Return 0 if success,
        return others if failed. """
        # Parse [content]
        try:
            new_content = dict(
                timestamp=float(content['date']) / 1000,  # Commit timestamp
                title=content['title'],  # Title of the paper
                keywords=[
                    e.strip().title() for e in content['keywords'].split(',')
                    if e.strip()
                ],  # Keywords of the paper, list
                descriptions=self._description2dict_(content['descriptions'])
            )  # Descriptions of the paper, dict
            logger.info(f'WORKER buffer_commit parsed content')
            print(new_content)
        except Exception as e:
            logger.error(
                f'WORKER buffer_commit failed on parsing content: {content}, error: {e}'
            )
            return 1

        # Get pdfbits
        pdfbits = self.buffer_get(name)
        if pdfbits is None:
            logger.error(
                f'WORKER buffer_commit failed on getting pdf file {name}')
            return 1

        try:
            # Commit to papers_server
            self.papers_server.new_commit(new_content, pdfbits)
            # Ignore new name in buffer_server
            self.buffer_server.new_ignore(name)
            logger.info(f'WORKER buffer_commit committed {new_content}.')
            return 0
        except Exception as e:
            logger.error(
                f'Worker buffer_commit failed on committing content: {new_content}, error: {e}'
            )
            return 1
Ejemplo n.º 8
0
    def get_by_title(self, title, fields):
        """ Get paper and its content by [title] according to [fields].
        inputs:
            title: Title of the paper
            fields: Fields to be returned
        outputs:
            Outputs are according to [fields], may contains following fields
            fpath: Path to the pdf file
            bits: Bits stream of pdf file
            keywords: Keywords
            descriptions: Descriptions """
        contents = dict()

        # Get fname and fpath
        fname = self._title2fname_(title)
        fpath = os.path.join(self.papers_dir, fname)
        if 'fpath' in fields:
            contents['fpath'] = fpath

        # Assert descriptions, keywords and pdf file exists
        assert(title in self.descriptions.index)
        assert(title in self.keywords.index)
        assert(os.path.exists(fpath))

        # Get contents
        # Descriptions
        if 'descriptions' in fields:
            descriptions = self.descriptions.loc[title]
            contents['descriptions'] = descriptions[descriptions.notna()]
        # Keywords
        if 'keywords' in fields:
            keywords = self.keywords.loc[title]
            contents['keywords'] = keywords[keywords.notna()]
        # Get bits of pdf file
        if 'bits' in fields:
            with open(fpath, 'rb') as f:
                bits_list = f.readlines()
            contents['bits'] = b''.join(bits_list)

        logger.info(f'PAPERS_SERVER get_by_title success on {title}')
        return contents
Ejemplo n.º 9
0
    def run(self, ip='localhost', port=8612):
        """ Run socket listening on [ip]:[port] """
        # Setup socket listener
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.bind((ip, port))
        sock.listen(1)
        logger.info(f'WEBSERVER listen on {ip}:{port}')
        # Serving
        idx = 0
        while self.running:
            # Accept new connection
            connection, client_address = sock.accept()
            logger.info(
                f'WEBSERVER is connected {connection} from {client_address}.')
            # Start new thread to serve the connection
            t = threading.Thread(target=self.serve_connection,
                                 args=(connection, client_address, idx))
            t.start()
            # idx increase
            idx = (idx + 1) % 65536

        logger.info("WEBSERVER stopped.")
Ejemplo n.º 10
0
 def serve_connection(self, connection, address, idx=None):
     """
     Method to serve [connection] of [idx] from [address].
     """
     try:
         # Fetch request
         request = connection.recv(65536).decode()
         length = len(request)
         logger.info(f'WEBSERVER-{idx} receives {length} bits')
         # Respond
         content = self.respond(request)
         if not isinstance(content, bytes):
             content = content.encode()
         length = len(content)
         logger.info(f'WEBSERVER-{idx} responses {length} bits')
         connection.sendall(content)
         idx += 1
     except Exception as e:
         logger.error(
             f'WEBSERVER runtime error. connection={connection}, client_address={address}, error={e}'
         )
     finally:
         connection.close()
         logger.info(f'WEBSERVER-{idx} connection closed')
Ejemplo n.º 11
0
 def __init__(self,
              buffer_server=BUFFER_SERVER(),
              papers_server=PAPERS_SERVER()):
     self.buffer_server = buffer_server
     self.papers_server = papers_server
     logger.info('WORKER initialized.')
Ejemplo n.º 12
0
 def __init__(self):
     self.buffer_dir = profiles.buffer_dir
     self.ignores_path = os.path.join(self.buffer_dir, 'ignores.json')
     self.update()
     logger.info('BUFFER_SERVER started.')
Ejemplo n.º 13
0
 def __init__(self, ip='localhost', port=8612):
     self.running = True
     logger.info("WEBSERVER initialized.")