Exemple #1
0
def book_from_xml(xml):
    authors = re.findall('<author>(.+?)</author>', xml)
    title = re.search('<title>(.+?)</title>', xml)

    # title
    if title is not None:
        title = title.group(1)
    else:
        title = ""

    # year
    year = re.search('<year>(.+?)</year>', xml)
    if year is not None:
        year = year.group(1)
    else:
        year = ""

    # link
    link = re.search('<ee>(.+?)</ee>', xml)
    if link is not None:
        link = link.group(1)
    else:
        link = ""

    book = Publication(title, authors, year, link)

    # contributor. It can be not existing
    publisher = re.search('<journal>(.+?)</journal', xml)
    if publisher is not None:
        publisher = publisher.group(1)
        book.add_contributor(publisher)

    book.print_info()
Exemple #2
0
    def __str__(self):
        import base64
        from Publication import Publication

        general_flags_str = 'General flags: '
        if self.is_deadman_switch_file():
            general_flags_str += 'Deadman Switch File'
        elif self.is_deadman_switch_key():
            general_flags_str += 'Deadman Switch Key'
        else:
            general_flags_str += 'None'

        s = ''
        if self.temporal_key is not None:
            s = "Temporal Key: %s\n" % binascii.hexlify(
                self.temporal_key).decode('ascii')

        return "PartialFile:\n\tInitial TXID: %s\n\tSanitized filename: %s\n\tDescription: %s\n\tFile size: %d\n\tEncryption type: %s\n\tContent type: %s\n\tCompression type: %s\n\t%s\n\tFile hash: %s\n\tFile pointer: %d\n\tACK Window: %s\n\t%s\n\tInitial block number: %d\n\tFinal block number: %d\n\tSQL ID: %d\n\tIs deadman switch file: %s\n\tIs deadman switch key: %s\n\tIs complete deadman switch file: %r\n\tIs complete: %r\n" % (
            self.initial_txid, self.sanitized_filename, self.description,
            self.file_size, Publication.get_encryption_str(
                self.encryption_type),
            Publication.get_content_type_str(self.content_type),
            Publication.get_compression_type_str(
                self.compression_type), general_flags_str,
            binascii.hexlify(self.file_hash).decode('ascii'), self.file_ptr,
            self.block_acks, s,
            self.initial_block_num, self.final_block_num, self.sql_id,
            self.is_deadman_switch_file(), self.is_deadman_switch_key(),
            self.is_complete_deadman_switch_file(), self.is_complete())
Exemple #3
0
 def __init__(self, title, publisher, status, created_by, category, type,
              synopsis, author, isbnno):
     Publication.__init__(self, title, publisher, status, created_by,
                          category, type)
     self.__synopsis = synopsis
     self.__author = author
     self.__isbnno = isbnno
Exemple #4
0
 def search_single_pub(self, paper_title: str) -> Publication:
     """Search by scholar query and return a single Publication object"""
     url = self.URLS('PUBSEARCH').format(requests.utils.quote(paper_title))
     soup = self._get_soup(self.URLS('HOST').format(url))
     self.__URLS['PUBLIB'] = soup.find('div',
                                       id='gs_res_glb').get('data-sva')
     return Publication(soup.find_all('div', 'gs_or')[0], self, 'scholar')
Exemple #5
0
def getDataFromEntry(f, line):
    firstBlock, line = getBlock(f, line)
    if 'RETRACTED ARTICLE' in firstBlock:
        return None
    parsedDateVector = re.findall(
        '((19|20)[0-9]{2}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))',
        firstBlock)
    if len(parsedDateVector) is 0:
        parsedDateVector = re.findall('((19|20)[0-9]{2})', firstBlock)
    if len(parsedDateVector) is 0:
        title, abstract, date, authors, pmid = getDataFromNonJournalEntry(
            f, line, firstBlock)
    else:
        title, abstract, date, authors, pmid = getDataFromJournalEntry(
            f, line, firstBlock)

    cleanedAuthors = re.sub('[0-9()]', '', authors).split(', ')
    parsedDateVector = re.findall(
        '((19|20)[0-9]{2}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))',
        date)
    if len(parsedDateVector) is 0:
        parsedDateVector = re.findall('((19|20)[0-9]{2})', date)
    if len(parsedDateVector) is 0:
        print('Error: No date found for paper', date)
        parsedDate = None
    else:
        parsedDate = parsedDateVector[0][0]
    return Publication(title, abstract, parsedDate, cleanedAuthors, pmid)
Exemple #6
0
 def _search_scholar_soup(self, soup):
     """Generator that returns Publication objects from the search page"""
     while True:
         for row in soup.find_all('div', 'gs_or'):
             yield Publication(row, self, 'scholar')
         if soup.find(class_='gs_ico gs_ico_nav_next'):
             url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href']
             soup = self._get_soup(self.URLS('HOST').format(url))
         else:
             break
Exemple #7
0
    def get_book_from_block(self, block):
        title = re.search('<h3>.*?<a.*?>(.*?)</a>', block)
        if title is not None:
            title = title.group(1)
        else:
            title = ""

        authors = re.findall('class="author-name-tooltip".*?>(.*?)</a>', block)
        for i, auth in enumerate(authors):
            authors[i] = re.sub('<.*?>', '', auth)

        year = re.search('class="conference">.*?<span.*? (\d{4}).*?</span>', block)
        if year is not None:
            year = year.group(1)
        else:
            year = ""

        link = re.search('<h3>.*?<a.*?href="(.*?)">', block)
        if link is not None:
            link = self.base_href + link.group(1)
        else:
            link = ""


        book = Publication(title, authors, year, link)

        desc = re.search('class="abstract">.*?<span.*?>(.*?)</span>.*?class="conference">.*?<span', block)
        if desc is not None:
            desc = desc.group(1)
            book.add_description(desc)

        publisher = re.search('class="conference">.*?</span><a.*?>(.*?)</a>', block)
        if publisher is not None:
            publisher = publisher.group(1)
            book.add_publisher(publisher)

        book.print_info()
Exemple #8
0
    def finalize(self, temporal_key, block_num):
        from Publication import Publication
        from Utils import Utils

        # If not all bytes were received, this is a failure.
        if (not self.is_complete()) and (
                not self.is_complete_deadman_switch_file()):
            self.d("Cannot finalize because file is not complete!")
            return False

        # Update the temporal key, if there is one.
        if (self.encryption_type != Publication.ENCRYPTION_TYPE_NONE) and \
           (temporal_key != (b'\x00' * 32)):
            self.temporal_key = temporal_key

        # If file is in plaintext, the hash is in the temporal key field.
        if self.encryption_type == Publication.ENCRYPTION_TYPE_NONE:
            self.file_hash = temporal_key
            self.temporal_key = b'\x00' * 32

        # Read the file we extracted.
        file_bytes = None
        with open(self.file_path, 'rb') as f:
            file_bytes = f.read()

        # Calculate the hash of the file we extracted.
        calculated_hash = hashlib.sha256(file_bytes).digest()

        # Check that the hash in the publication header matches what we have.
        if self.file_hash != calculated_hash:
            self.d("Hashes do not match!:\n%s\n%s" %
                   (binascii.hexlify(self.file_hash).decode('ascii'),
                    binascii.hexlify(calculated_hash).decode('ascii')))
            return False

        # If this file is a deadman switch, don't try to decrypt, since we don't
        # have the real key here.
        if self.is_deadman_switch_file() and (temporal_key == (b'\xff' * 32)):
            # Save the num_parallel_txs and encryption_type so that when the key
            # is found in the future, we know how to decrypt this.
            self.final_block_num = block_num
            self.save_state()
            return True

        # Get a unique filename in the output directory.
        new_file_path = PartialFile.get_unique_filepath(
            self.initial_txid, self.output_dir, self.sanitized_filename)

        # Decrypt the file, if necessary.
        if self.encryption_type == Publication.ENCRYPTION_TYPE_GPG2_AES256_SHA512:
            self.d("File is encrypted with type %s.  Decrypting..." %
                   Publication.get_encryption_str(self.encryption_type))
            file_bytes = Utils.decrypt(file_bytes, self.temporal_key)
            if len(file_bytes) == 0:
                self.d("Decryption of file yielded zero bytes!")
                return False

            # Write the plaintext bytes into the output directory.
            with open(new_file_path, 'wb') as f:
                f.write(file_bytes)

            # Remove the encrypted file.
            try:
                os.unlink(self.file_path)
            except FileNotFoundError:
                pass

        else:
            # Move file out of partial directory into output directory.
            os.rename(self.file_path, new_file_path)

        # Update the file_path with its final destination.
        self.file_path = new_file_path

        # Delete the state file.
        try:
            os.unlink(self.state_file)
        except FileNotFoundError:
            pass

        # Update the final block number.
        self.final_block_num = block_num

        # Mark as finalized and return success.
        self.finalized = True
        return True
Exemple #9
0
    def get_estimate(rpc_client, filepath, chain, num_outputs, num_concurrent_transactions, estimate_with_fee):
        from Publication import Publication

        cost = 0.0
        time = None
        ntransactions = 0
        size = None

        # No fee rate was given, so try to get it from the network.
        if (estimate_with_fee is None) or (estimate_with_fee < 0.0):
            print("Getting fee estimate from network...")
            estimate_with_fee = rpc_client.estimatefee(1)
            if estimate_with_fee <= 0.0:
                print("Error: could not get fee estimate from network.  Specify fee manually with --txfee argument.")
                sys.exit(-1)
            else:
                print("Found fee estimate: %f" % estimate_with_fee)


        nbytes = os.stat(filepath).st_size
        if nbytes > 1073741824:
            size = "%s GB" % format(nbytes / 1073741824, '2.1f')
        elif nbytes > 1048576:
            size = "%s MB" % format(nbytes / 1048576, '2.1f')
        elif nbytes > 1024:
            size = "%s KB" % format(nbytes / 1024, '2.1f')
        else:
            size = "%d bytes" % nbytes


        total_num_transactions = math.ceil(nbytes / (num_outputs * Publication.SINGLE_OUTPUT_SIZE))
        num_block_generations = math.ceil(total_num_transactions / num_concurrent_transactions)

        # Another three blocks/transactions are needed for the header,
        # termination, and change transactions.
        num_block_generations += 3
        total_num_transactions += 3

        # For multi-transaction publications, theres a NOOP transaction in the
        # beginning and at the end.
        if num_concurrent_transactions > 1:
            num_block_generations += 2
            total_num_transactions += 2


        # Notes from observation:
        #   Beginning header is 104 bytes, 963 signed (sometimes 739).
        #   Termination header is 148 bytes, 370 signed.
        #   NOOP header is 48 bytes, 391 signed.

        # Through observation, it appears that the file payload accounts for
        # about 2/3rds of the size of the signed transaction.  In other words,
        # when a transaction is carrying 2236 bytes (via 5 outputs), the signed
        # transaction comes to about 3334 bytes (which is about 67%
        # efficiency).  This ratio appears stable even for larger payloads;
        # when transactions carry 4476 bytes (via 10 outputs), the signed
        # transaction size is around 6657 bytes (also about 67% efficient).
        # Hence the overhead multiplier to convert the file size bytes to
        # signed transaction bytes is around 1.5.
        #
        # Also, we will add in the signed message sizes of the beginning header
        # and terminating header.  These were seen to be 963 and 370,
        # respectively, though we will round them up to 1024 and 512.
        tx_bytes = math.ceil(nbytes * 1.5) + 1024 + 512

        # When publishing with multiple transactions, NOOP messages are sent to
        # split the header message into multiple generations.  These NOOPs were
        # observed to be 391 bytes after signing, and we round them up to 512
        # here.  Since this occurs once at the start of publication, and once
        # at the end, this is multiplied by 2.
        if num_concurrent_transactions > 1:
            tx_bytes = tx_bytes + ((num_concurrent_transactions * 512) * 2)

        # Multiply the kilobytes of signed data with the per-KB transaction
        # fee rate.
        transaction_fees = (tx_bytes / 1024) * estimate_with_fee

        # Calculate the transaction fees for Dogecoin differently... because
        # reasons.
        if chain == Publication.BLOCKCHAIN_DOGE:
            # Estimate the final size of each transaction (with sigs included).
            tx_size = (num_outputs * Publication.SINGLE_OUTPUT_SIZE) * 1.5

            # Estimate the fee needed per each transaction.
            fee_per_tx = math.ceil(tx_size / 1024)

            transaction_fees = total_num_transactions * fee_per_tx

        # The estimated cost is the transaction fees, plus the amounts we are
        # sending back and forth.  That is the dust threshold, times the number
        # of outputs per transaction, times the number of concurrent
        # transactions.  This amount is refundable at the end of publication.
        refundable_amount = (num_concurrent_transactions * num_outputs * Publication.DUST_THRESHOLD)
        publication_cost = transaction_fees + refundable_amount

        # The 1.5 multiplier is more accurate for larger file publications, and
        # not so accurate for smaller ones.  So we will scale up the estimate
        # based on file size.
        multiplier = 1.0

        # Smaller than 10KB: 25% increase.
        if nbytes < (1024 * 10):
            multiplier = 1.25

        # Smaller than 100KB: 20% increase.
        elif nbytes < (1024 * 100):
            multiplier = 1.20

        # Smaller than 500KB: 15% increase.
        elif nbytes < (1024 * 500):
            multiplier = 1.15

        # Larger than 500KB: 10% increase.
        else:
            multiplier = 1.10

        publication_cost = publication_cost * multiplier

        # Fees in dogecoin should all be rounded up.
        if chain == Publication.BLOCKCHAIN_DOGE:
            publication_cost = int(math.ceil(publication_cost))

        time = Publication.get_time_estimate(num_block_generations, chain)
        return publication_cost, transaction_fees, refundable_amount, multiplier, time, num_block_generations, size, estimate_with_fee
Exemple #10
0
def ref49615():
    """ Belle', 2003, 'P. Krokovny et al """
    return Publication(49615, [
        Measurement('M172', 'M172M', '123.8 x 10^6 BBbar events'),
        Measurement('M172', 'M172DM', '123.8 x 10^6 BBbar events')
    ])
 def __init__(self, title, publisher, status, created_by, category, type,
              frequency):
     Publication.__init__(self, title, publisher, status, created_by,
                          category, type)
     self.__frequency = frequency
Exemple #12
0
   def __str__(self):
      import base64
      from Publication import Publication

      general_flags_str = 'General flags: '
      if self.is_deadman_switch_file():
         general_flags_str += 'Deadman Switch File'
      elif self.is_deadman_switch_key():
         general_flags_str += 'Deadman Switch Key'
      else:
         general_flags_str += 'None'

      s = ''
      if self.temporal_key is not None:
         s = "Temporal Key: %s\n" % binascii.hexlify(self.temporal_key).decode('ascii')

      return "PartialFile:\n\tInitial TXID: %s\n\tSanitized filename: %s\n\tDescription: %s\n\tFile size: %d\n\tEncryption type: %s\n\tContent type: %s\n\tCompression type: %s\n\t%s\n\tFile hash: %s\n\tFile pointer: %d\n\tACK Window: %s\n\t%s\n\tInitial block number: %d\n\tFinal block number: %d\n\tSQL ID: %d\n\tIs deadman switch file: %s\n\tIs deadman switch key: %s\n\tIs complete deadman switch file: %r\n\tIs complete: %r\n" % (self.initial_txid, self.sanitized_filename, self.description, self.file_size, Publication.get_encryption_str(self.encryption_type), Publication.get_content_type_str(self.content_type), Publication.get_compression_type_str(self.compression_type), general_flags_str, binascii.hexlify(self.file_hash).decode('ascii'), self.file_ptr, self.block_acks, s, self.initial_block_num, self.final_block_num, self.sql_id, self.is_deadman_switch_file(), self.is_deadman_switch_key(), self.is_complete_deadman_switch_file(), self.is_complete())
Exemple #13
0
   def finalize(self, temporal_key, block_num):
      from Publication import Publication
      from Utils import Utils

      # If not all bytes were received, this is a failure.
      if (not self.is_complete()) and (not self.is_complete_deadman_switch_file()):
         self.d("Cannot finalize because file is not complete!")
         return False

      # Update the temporal key, if there is one.
      if (self.encryption_type != Publication.ENCRYPTION_TYPE_NONE) and \
         (temporal_key != (b'\x00' * 32)):
         self.temporal_key = temporal_key

      # If file is in plaintext, the hash is in the temporal key field.
      if self.encryption_type == Publication.ENCRYPTION_TYPE_NONE:
         self.file_hash = temporal_key
         self.temporal_key = b'\x00' * 32

      # Read the file we extracted.
      file_bytes = None
      with open(self.file_path, 'rb') as f:
         file_bytes = f.read()

      # Calculate the hash of the file we extracted.
      calculated_hash = hashlib.sha256(file_bytes).digest()

      # Check that the hash in the publication header matches what we have.
      if self.file_hash != calculated_hash:
         self.d("Hashes do not match!:\n%s\n%s" % (binascii.hexlify(self.file_hash).decode('ascii'), binascii.hexlify(calculated_hash).decode('ascii')))
         return False

      # If this file is a deadman switch, don't try to decrypt, since we don't
      # have the real key here.
      if self.is_deadman_switch_file() and (temporal_key == (b'\xff' * 32)):
         # Save the num_parallel_txs and encryption_type so that when the key
         # is found in the future, we know how to decrypt this.
         self.final_block_num = block_num
         self.save_state()
         return True

      # Get a unique filename in the output directory.
      new_file_path = PartialFile.get_unique_filepath(self.initial_txid, self.output_dir, self.sanitized_filename)

      # Decrypt the file, if necessary.
      if self.encryption_type == Publication.ENCRYPTION_TYPE_GPG2_AES256_SHA512:
         self.d("File is encrypted with type %s.  Decrypting..." % Publication.get_encryption_str(self.encryption_type))
         file_bytes = Utils.decrypt(file_bytes, self.temporal_key)
         if len(file_bytes) == 0:
            self.d("Decryption of file yielded zero bytes!")
            return False

         # Write the plaintext bytes into the output directory.
         with open(new_file_path, 'wb') as f:
            f.write(file_bytes)

         # Remove the encrypted file.
         try:
            os.unlink(self.file_path)
         except FileNotFoundError:
            pass

      else:
         # Move file out of partial directory into output directory.
         os.rename(self.file_path, new_file_path)

      # Update the file_path with its final destination.
      self.file_path = new_file_path

      # Delete the state file.
      try:
         os.unlink(self.state_file)
      except FileNotFoundError:
         pass

      # Update the final block number.
      self.final_block_num = block_num

      # Mark as finalized and return success.
      self.finalized = True
      return True