def book_from_xml(xml): authors = re.findall('<author>(.+?)</author>', xml) title = re.search('<title>(.+?)</title>', xml) # title if title is not None: title = title.group(1) else: title = "" # year year = re.search('<year>(.+?)</year>', xml) if year is not None: year = year.group(1) else: year = "" # link link = re.search('<ee>(.+?)</ee>', xml) if link is not None: link = link.group(1) else: link = "" book = Publication(title, authors, year, link) # contributor. It can be not existing publisher = re.search('<journal>(.+?)</journal', xml) if publisher is not None: publisher = publisher.group(1) book.add_contributor(publisher) book.print_info()
def __str__(self): import base64 from Publication import Publication general_flags_str = 'General flags: ' if self.is_deadman_switch_file(): general_flags_str += 'Deadman Switch File' elif self.is_deadman_switch_key(): general_flags_str += 'Deadman Switch Key' else: general_flags_str += 'None' s = '' if self.temporal_key is not None: s = "Temporal Key: %s\n" % binascii.hexlify( self.temporal_key).decode('ascii') return "PartialFile:\n\tInitial TXID: %s\n\tSanitized filename: %s\n\tDescription: %s\n\tFile size: %d\n\tEncryption type: %s\n\tContent type: %s\n\tCompression type: %s\n\t%s\n\tFile hash: %s\n\tFile pointer: %d\n\tACK Window: %s\n\t%s\n\tInitial block number: %d\n\tFinal block number: %d\n\tSQL ID: %d\n\tIs deadman switch file: %s\n\tIs deadman switch key: %s\n\tIs complete deadman switch file: %r\n\tIs complete: %r\n" % ( self.initial_txid, self.sanitized_filename, self.description, self.file_size, Publication.get_encryption_str( self.encryption_type), Publication.get_content_type_str(self.content_type), Publication.get_compression_type_str( self.compression_type), general_flags_str, binascii.hexlify(self.file_hash).decode('ascii'), self.file_ptr, self.block_acks, s, self.initial_block_num, self.final_block_num, self.sql_id, self.is_deadman_switch_file(), self.is_deadman_switch_key(), self.is_complete_deadman_switch_file(), self.is_complete())
def __init__(self, title, publisher, status, created_by, category, type, synopsis, author, isbnno): Publication.__init__(self, title, publisher, status, created_by, category, type) self.__synopsis = synopsis self.__author = author self.__isbnno = isbnno
def search_single_pub(self, paper_title: str) -> Publication: """Search by scholar query and return a single Publication object""" url = self.URLS('PUBSEARCH').format(requests.utils.quote(paper_title)) soup = self._get_soup(self.URLS('HOST').format(url)) self.__URLS['PUBLIB'] = soup.find('div', id='gs_res_glb').get('data-sva') return Publication(soup.find_all('div', 'gs_or')[0], self, 'scholar')
def getDataFromEntry(f, line): firstBlock, line = getBlock(f, line) if 'RETRACTED ARTICLE' in firstBlock: return None parsedDateVector = re.findall( '((19|20)[0-9]{2}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))', firstBlock) if len(parsedDateVector) is 0: parsedDateVector = re.findall('((19|20)[0-9]{2})', firstBlock) if len(parsedDateVector) is 0: title, abstract, date, authors, pmid = getDataFromNonJournalEntry( f, line, firstBlock) else: title, abstract, date, authors, pmid = getDataFromJournalEntry( f, line, firstBlock) cleanedAuthors = re.sub('[0-9()]', '', authors).split(', ') parsedDateVector = re.findall( '((19|20)[0-9]{2}\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))', date) if len(parsedDateVector) is 0: parsedDateVector = re.findall('((19|20)[0-9]{2})', date) if len(parsedDateVector) is 0: print('Error: No date found for paper', date) parsedDate = None else: parsedDate = parsedDateVector[0][0] return Publication(title, abstract, parsedDate, cleanedAuthors, pmid)
def _search_scholar_soup(self, soup): """Generator that returns Publication objects from the search page""" while True: for row in soup.find_all('div', 'gs_or'): yield Publication(row, self, 'scholar') if soup.find(class_='gs_ico gs_ico_nav_next'): url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href'] soup = self._get_soup(self.URLS('HOST').format(url)) else: break
def get_book_from_block(self, block): title = re.search('<h3>.*?<a.*?>(.*?)</a>', block) if title is not None: title = title.group(1) else: title = "" authors = re.findall('class="author-name-tooltip".*?>(.*?)</a>', block) for i, auth in enumerate(authors): authors[i] = re.sub('<.*?>', '', auth) year = re.search('class="conference">.*?<span.*? (\d{4}).*?</span>', block) if year is not None: year = year.group(1) else: year = "" link = re.search('<h3>.*?<a.*?href="(.*?)">', block) if link is not None: link = self.base_href + link.group(1) else: link = "" book = Publication(title, authors, year, link) desc = re.search('class="abstract">.*?<span.*?>(.*?)</span>.*?class="conference">.*?<span', block) if desc is not None: desc = desc.group(1) book.add_description(desc) publisher = re.search('class="conference">.*?</span><a.*?>(.*?)</a>', block) if publisher is not None: publisher = publisher.group(1) book.add_publisher(publisher) book.print_info()
def finalize(self, temporal_key, block_num): from Publication import Publication from Utils import Utils # If not all bytes were received, this is a failure. if (not self.is_complete()) and ( not self.is_complete_deadman_switch_file()): self.d("Cannot finalize because file is not complete!") return False # Update the temporal key, if there is one. if (self.encryption_type != Publication.ENCRYPTION_TYPE_NONE) and \ (temporal_key != (b'\x00' * 32)): self.temporal_key = temporal_key # If file is in plaintext, the hash is in the temporal key field. if self.encryption_type == Publication.ENCRYPTION_TYPE_NONE: self.file_hash = temporal_key self.temporal_key = b'\x00' * 32 # Read the file we extracted. file_bytes = None with open(self.file_path, 'rb') as f: file_bytes = f.read() # Calculate the hash of the file we extracted. calculated_hash = hashlib.sha256(file_bytes).digest() # Check that the hash in the publication header matches what we have. if self.file_hash != calculated_hash: self.d("Hashes do not match!:\n%s\n%s" % (binascii.hexlify(self.file_hash).decode('ascii'), binascii.hexlify(calculated_hash).decode('ascii'))) return False # If this file is a deadman switch, don't try to decrypt, since we don't # have the real key here. if self.is_deadman_switch_file() and (temporal_key == (b'\xff' * 32)): # Save the num_parallel_txs and encryption_type so that when the key # is found in the future, we know how to decrypt this. self.final_block_num = block_num self.save_state() return True # Get a unique filename in the output directory. new_file_path = PartialFile.get_unique_filepath( self.initial_txid, self.output_dir, self.sanitized_filename) # Decrypt the file, if necessary. if self.encryption_type == Publication.ENCRYPTION_TYPE_GPG2_AES256_SHA512: self.d("File is encrypted with type %s. Decrypting..." % Publication.get_encryption_str(self.encryption_type)) file_bytes = Utils.decrypt(file_bytes, self.temporal_key) if len(file_bytes) == 0: self.d("Decryption of file yielded zero bytes!") return False # Write the plaintext bytes into the output directory. with open(new_file_path, 'wb') as f: f.write(file_bytes) # Remove the encrypted file. try: os.unlink(self.file_path) except FileNotFoundError: pass else: # Move file out of partial directory into output directory. os.rename(self.file_path, new_file_path) # Update the file_path with its final destination. self.file_path = new_file_path # Delete the state file. try: os.unlink(self.state_file) except FileNotFoundError: pass # Update the final block number. self.final_block_num = block_num # Mark as finalized and return success. self.finalized = True return True
def get_estimate(rpc_client, filepath, chain, num_outputs, num_concurrent_transactions, estimate_with_fee): from Publication import Publication cost = 0.0 time = None ntransactions = 0 size = None # No fee rate was given, so try to get it from the network. if (estimate_with_fee is None) or (estimate_with_fee < 0.0): print("Getting fee estimate from network...") estimate_with_fee = rpc_client.estimatefee(1) if estimate_with_fee <= 0.0: print("Error: could not get fee estimate from network. Specify fee manually with --txfee argument.") sys.exit(-1) else: print("Found fee estimate: %f" % estimate_with_fee) nbytes = os.stat(filepath).st_size if nbytes > 1073741824: size = "%s GB" % format(nbytes / 1073741824, '2.1f') elif nbytes > 1048576: size = "%s MB" % format(nbytes / 1048576, '2.1f') elif nbytes > 1024: size = "%s KB" % format(nbytes / 1024, '2.1f') else: size = "%d bytes" % nbytes total_num_transactions = math.ceil(nbytes / (num_outputs * Publication.SINGLE_OUTPUT_SIZE)) num_block_generations = math.ceil(total_num_transactions / num_concurrent_transactions) # Another three blocks/transactions are needed for the header, # termination, and change transactions. num_block_generations += 3 total_num_transactions += 3 # For multi-transaction publications, theres a NOOP transaction in the # beginning and at the end. if num_concurrent_transactions > 1: num_block_generations += 2 total_num_transactions += 2 # Notes from observation: # Beginning header is 104 bytes, 963 signed (sometimes 739). # Termination header is 148 bytes, 370 signed. # NOOP header is 48 bytes, 391 signed. # Through observation, it appears that the file payload accounts for # about 2/3rds of the size of the signed transaction. In other words, # when a transaction is carrying 2236 bytes (via 5 outputs), the signed # transaction comes to about 3334 bytes (which is about 67% # efficiency). This ratio appears stable even for larger payloads; # when transactions carry 4476 bytes (via 10 outputs), the signed # transaction size is around 6657 bytes (also about 67% efficient). # Hence the overhead multiplier to convert the file size bytes to # signed transaction bytes is around 1.5. # # Also, we will add in the signed message sizes of the beginning header # and terminating header. These were seen to be 963 and 370, # respectively, though we will round them up to 1024 and 512. tx_bytes = math.ceil(nbytes * 1.5) + 1024 + 512 # When publishing with multiple transactions, NOOP messages are sent to # split the header message into multiple generations. These NOOPs were # observed to be 391 bytes after signing, and we round them up to 512 # here. Since this occurs once at the start of publication, and once # at the end, this is multiplied by 2. if num_concurrent_transactions > 1: tx_bytes = tx_bytes + ((num_concurrent_transactions * 512) * 2) # Multiply the kilobytes of signed data with the per-KB transaction # fee rate. transaction_fees = (tx_bytes / 1024) * estimate_with_fee # Calculate the transaction fees for Dogecoin differently... because # reasons. if chain == Publication.BLOCKCHAIN_DOGE: # Estimate the final size of each transaction (with sigs included). tx_size = (num_outputs * Publication.SINGLE_OUTPUT_SIZE) * 1.5 # Estimate the fee needed per each transaction. fee_per_tx = math.ceil(tx_size / 1024) transaction_fees = total_num_transactions * fee_per_tx # The estimated cost is the transaction fees, plus the amounts we are # sending back and forth. That is the dust threshold, times the number # of outputs per transaction, times the number of concurrent # transactions. This amount is refundable at the end of publication. refundable_amount = (num_concurrent_transactions * num_outputs * Publication.DUST_THRESHOLD) publication_cost = transaction_fees + refundable_amount # The 1.5 multiplier is more accurate for larger file publications, and # not so accurate for smaller ones. So we will scale up the estimate # based on file size. multiplier = 1.0 # Smaller than 10KB: 25% increase. if nbytes < (1024 * 10): multiplier = 1.25 # Smaller than 100KB: 20% increase. elif nbytes < (1024 * 100): multiplier = 1.20 # Smaller than 500KB: 15% increase. elif nbytes < (1024 * 500): multiplier = 1.15 # Larger than 500KB: 10% increase. else: multiplier = 1.10 publication_cost = publication_cost * multiplier # Fees in dogecoin should all be rounded up. if chain == Publication.BLOCKCHAIN_DOGE: publication_cost = int(math.ceil(publication_cost)) time = Publication.get_time_estimate(num_block_generations, chain) return publication_cost, transaction_fees, refundable_amount, multiplier, time, num_block_generations, size, estimate_with_fee
def ref49615(): """ Belle', 2003, 'P. Krokovny et al """ return Publication(49615, [ Measurement('M172', 'M172M', '123.8 x 10^6 BBbar events'), Measurement('M172', 'M172DM', '123.8 x 10^6 BBbar events') ])
def __init__(self, title, publisher, status, created_by, category, type, frequency): Publication.__init__(self, title, publisher, status, created_by, category, type) self.__frequency = frequency
def __str__(self): import base64 from Publication import Publication general_flags_str = 'General flags: ' if self.is_deadman_switch_file(): general_flags_str += 'Deadman Switch File' elif self.is_deadman_switch_key(): general_flags_str += 'Deadman Switch Key' else: general_flags_str += 'None' s = '' if self.temporal_key is not None: s = "Temporal Key: %s\n" % binascii.hexlify(self.temporal_key).decode('ascii') return "PartialFile:\n\tInitial TXID: %s\n\tSanitized filename: %s\n\tDescription: %s\n\tFile size: %d\n\tEncryption type: %s\n\tContent type: %s\n\tCompression type: %s\n\t%s\n\tFile hash: %s\n\tFile pointer: %d\n\tACK Window: %s\n\t%s\n\tInitial block number: %d\n\tFinal block number: %d\n\tSQL ID: %d\n\tIs deadman switch file: %s\n\tIs deadman switch key: %s\n\tIs complete deadman switch file: %r\n\tIs complete: %r\n" % (self.initial_txid, self.sanitized_filename, self.description, self.file_size, Publication.get_encryption_str(self.encryption_type), Publication.get_content_type_str(self.content_type), Publication.get_compression_type_str(self.compression_type), general_flags_str, binascii.hexlify(self.file_hash).decode('ascii'), self.file_ptr, self.block_acks, s, self.initial_block_num, self.final_block_num, self.sql_id, self.is_deadman_switch_file(), self.is_deadman_switch_key(), self.is_complete_deadman_switch_file(), self.is_complete())
def finalize(self, temporal_key, block_num): from Publication import Publication from Utils import Utils # If not all bytes were received, this is a failure. if (not self.is_complete()) and (not self.is_complete_deadman_switch_file()): self.d("Cannot finalize because file is not complete!") return False # Update the temporal key, if there is one. if (self.encryption_type != Publication.ENCRYPTION_TYPE_NONE) and \ (temporal_key != (b'\x00' * 32)): self.temporal_key = temporal_key # If file is in plaintext, the hash is in the temporal key field. if self.encryption_type == Publication.ENCRYPTION_TYPE_NONE: self.file_hash = temporal_key self.temporal_key = b'\x00' * 32 # Read the file we extracted. file_bytes = None with open(self.file_path, 'rb') as f: file_bytes = f.read() # Calculate the hash of the file we extracted. calculated_hash = hashlib.sha256(file_bytes).digest() # Check that the hash in the publication header matches what we have. if self.file_hash != calculated_hash: self.d("Hashes do not match!:\n%s\n%s" % (binascii.hexlify(self.file_hash).decode('ascii'), binascii.hexlify(calculated_hash).decode('ascii'))) return False # If this file is a deadman switch, don't try to decrypt, since we don't # have the real key here. if self.is_deadman_switch_file() and (temporal_key == (b'\xff' * 32)): # Save the num_parallel_txs and encryption_type so that when the key # is found in the future, we know how to decrypt this. self.final_block_num = block_num self.save_state() return True # Get a unique filename in the output directory. new_file_path = PartialFile.get_unique_filepath(self.initial_txid, self.output_dir, self.sanitized_filename) # Decrypt the file, if necessary. if self.encryption_type == Publication.ENCRYPTION_TYPE_GPG2_AES256_SHA512: self.d("File is encrypted with type %s. Decrypting..." % Publication.get_encryption_str(self.encryption_type)) file_bytes = Utils.decrypt(file_bytes, self.temporal_key) if len(file_bytes) == 0: self.d("Decryption of file yielded zero bytes!") return False # Write the plaintext bytes into the output directory. with open(new_file_path, 'wb') as f: f.write(file_bytes) # Remove the encrypted file. try: os.unlink(self.file_path) except FileNotFoundError: pass else: # Move file out of partial directory into output directory. os.rename(self.file_path, new_file_path) # Update the file_path with its final destination. self.file_path = new_file_path # Delete the state file. try: os.unlink(self.state_file) except FileNotFoundError: pass # Update the final block number. self.final_block_num = block_num # Mark as finalized and return success. self.finalized = True return True