def latex(self, doc): with doc.create(Subsection(self.Tempering + self.name)): self.latexMonsterBlock(doc) self.latexStatus(doc) with doc.create(Subsubsection("Breakable Parts")): for part, name in zip(self.Parts, self.PartNames['PartStringIds']): doc.append( self.latexPart( part, name.replace("LOC_PART_", "").replace("_", " ").title())) if self.Sever: if 'RemovablePartStringIds' not in self.PartNames or len( self.PartNames['RemovablePartStringIds']) != len( self.Sever): self.PartNames['RemovablePartStringIds'] = [ "Unk%d" % ix for ix in range(len(self.Sever)) ] with doc.create(Subsubsection("Severable Parts")): for part, name in zip( self.Sever, self.PartNames['RemovablePartStringIds']): doc.append( self.latexPart( part, name.replace("LOC_REMOVABLE_PART_", "").replace("_", " ").title()))
def generate_user_stories(schema: PLDSchema, locale: LocaleDictionary, document: Document) -> Document: document.append(NewPage()) with document.create(Section(title=locale.user_stories)) as section: section: Section for deliverable in schema.deliverables: with section.create( Subsection(title=deliverable.name)) as subsection: subsection: Subsection if deliverable.description is not None: subsection.append(MediumText(data=deliverable.description)) for subset in deliverable.subsets: with subsection.create( Subsubsection(title=subset.name)) as subsubsection: subsubsection: Subsubsection if subset.description is not None: subsubsection.append( MediumText(data=subset.description)) for user_story in subset.user_stories: with subsubsection.create( Paragraph( title=user_story.name)) as paragraph: paragraph: Paragraph paragraph.append(Command("mbox", "")) paragraph.append(NoEscape("\\\\\n")) generate_user_story(user_story, locale, paragraph) return document
def generateLatex(self): geometry_options = {"tmargin": "1cm", "lmargin": "3cm", "bmargin": "2cm"} doc = Document(geometry_options=geometry_options) doc.packages.append(Package("float")) for chapter in self.chapters: with doc.create(Section(str(chapter.name))): # Chapter Intro if not chapter.introLines == []: for line in chapter.introLines: if line.lineType == "normal": doc.append(line.content) # Subchapter for subChapter in chapter.subChapters: with doc.create(Subsection(subChapter.name)): if not subChapter.lines == []: for line in subChapter.lines: doc = self.createParagraph( doc, line) if not subChapter.sschapters == []: for sschapter in subChapter.sschapters: with doc.create(Subsubsection(sschapter.name)): if not sschapter.lines == []: for line in sschapter.lines: doc = self.createParagraph( doc, line) doc.generate_pdf(self.filePath+self.filename, clean_tex=False)
def add_shopping_list(doc: Document, camp: Camp): # content for this page doc.append(Section('Einkaufsliste', numbering=False)) # space between colums doc.append(Command('setlength')) doc.append(Command('columnsep', arguments='40pt')) doc.packages.add(Package('multicol')) doc.packages.add(Package('enumitem')) doc.packages.add(Package('setspace')) for _ in range(1): doc.append(Subsubsection('Gemüse und Früchte', numbering=False)) with doc.create(Multicols(arguments='2')) as multicols: multicols.append(Command('small')) with multicols.create( Description( options='leftmargin=1.75cm, itemsep=4pt')) as itemize: # space between colums itemize.append( Command('setlength', arguments=Command('itemsep'), extra_arguments='0pt')) itemize.append( Command('setlength', arguments=Command('parskip'), extra_arguments='0pt')) itemize.add_item('100g', 'the first item') itemize.add_item('23 Stk.', 'Bananen') itemize.add_item('100g', 'the first item') itemize.add_item('10g', 'the item')
def append_normal_stress(self, doc, tpdf): with doc.create(Section(tpdf.step_normal_stress)): with doc.create(Subsection(tpdf.normal_stress_formula)): self.pdf.add_equation( tpdf.normal_stress_var + r' \frac{N}{A} - \frac{My}{Iy} \cdot z - \frac{Mz}{Iz} \cdot y' ) for i in range(len(self.mng.normal_stress_data_list)): with doc.create( Subsection( f'{tpdf.calculating_for} N = {self.mng.normal_stress_data_list[i].normal_force} N, ' f'My = {append_result(self.mng.normal_stress_data_list[i].moment_y)} Nm, ' f'Mz = {append_result(self.mng.normal_stress_data_list[i].moment_x)} Nm, ' f'y = {self.mng.normal_stress_data_list[i].y} m, ' f'z = {self.mng.normal_stress_data_list[i].z} m')): with doc.create(Subsubsection(tpdf.step_normal_stress)): self.pdf.add_equation( tpdf.normal_stress_var + f'{append_step(self.mng.normal_stress_data_list[i].normal_stress)}' ) self.pdf.add_equation( tpdf.normal_stress_var + f'{append_result(self.mng.normal_stress_data_list[i].normal_stress)}$ $Pa' )
def print_ensemble_models(self, row): # - create a sub_sub_section to easily indent # # - subsubsection title is the ensmble name # # - table of the ensemble models (layer 0) # ensemble_method = row[0] # name of the ensmble method with self.doc.create( Subsubsection(self.get_model_name(ensemble_method), numbering=False)): # create table for the ensmeble models # self.doc.append(NoEscape(r'\leftskip=40pt')) # indentation self.doc.append('Score: ' + str(row[1]) + '\n\n') table = Tabular('|c|c|c|l|') table.add_hline() # add header of table # table.add_row(('Learner', 'Score', 'Parameters', 'weight')) table.add_hline() # foreach model in the ensemble add row in the table # for k in row[3]: cur_model = self.experiments.loc[k] data = [ cur_model[0], cur_model[1], self.format_dict(cur_model[3]), row[3][k] ] table.add_row(data) table.add_hline() self.doc.append(table)
def __get_sub_section_form_depth(self): if self.depth == 0: return Section(self.name) elif self.depth == 1: return Subsection(self.name) elif self.depth == 2: return Subsubsection(self.name)
def print_ensemble(self, row): d = row[3] # dictionary of dictionaries(of ensembles) cnt = 1 with self.doc.create( Subsubsection('Ensemble Selection', numbering=False)): self.doc.append(NoEscape(r'\leftskip=40pt')) # indentation self.doc.append('Score: ' + str(row[1]) + '\n\n') for sub_d in d: self.doc.append('Bag: ' + str(cnt) + '\n\n') cnt += 1 # for every ensemble print it table = Tabular('|c|c|c|l|') table.add_hline() # add header of table # table.add_row(('Learner', 'Score', 'Parameters', 'weight')) table.add_hline() for k in sub_d: cur_model = self.experiments.loc[k] data = [ cur_model[0], round(cur_model[1], 4), self.format_dict(cur_model[3]), sub_d[k] ] table.add_row(data) table.add_hline() self.doc.append(table) self.doc.append('\n\n\n\n')
def _document_fe_graph(self) -> None: """Add FE execution graphs into the traceability document. """ with self.doc.create(Section("FastEstimator Architecture")): for mode in self.system.pipeline.data.keys(): scheduled_items = self.system.pipeline.get_scheduled_items( mode) + self.system.network.get_scheduled_items(mode) + self.system.traces signature_epochs = get_signature_epochs(scheduled_items, total_epochs=self.system.epoch_idx, mode=mode) epochs_with_data = self.system.pipeline.get_epochs_with_data(total_epochs=self.system.epoch_idx, mode=mode) if set(signature_epochs) & epochs_with_data: self.doc.append(NoEscape(r'\FloatBarrier')) with self.doc.create(Subsection(mode.capitalize())): for epoch in signature_epochs: if epoch not in epochs_with_data: continue self.doc.append(NoEscape(r'\FloatBarrier')) with self.doc.create( Subsubsection(f"Epoch {epoch}", label=Label(Marker(name=f"{mode}{epoch}", prefix="ssubsec")))): diagram = self._draw_diagram(mode, epoch) ltx = d2t.dot2tex(diagram.to_string(), figonly=True) args = Arguments(**{'max width': r'\textwidth, max height=0.9\textheight'}) args.escape = False with self.doc.create(Center()): with self.doc.create(AdjustBox(arguments=args)) as box: box.append(NoEscape(ltx))
def start(): info = get_json(f"../{defaults.get('jsonOutput')}") doc = Document(default_filepath=defaults.get('pdfPath')) for artist, artistListStuff in info.items(): with doc.create(Section(f'Artist: {artist}')): importLinedFigures(doc, artistListStuff['base_objects']) for subsection, subsectionList in artistListStuff['sub_objects'].items( ): with doc.create(Subsection(f'Folder: {subsection}')): importLinedFigures(doc, subsectionList) doc.append(NoEscape(r'\clearpage')) for subsubsection, subsubsectionList in artistListStuff[ 'sub_sub_objects'].items(): with doc.create(Subsubsection(f'Sub Folder: {subsubsection}')): importLinedFigures(doc, subsubsectionList) doc.append(NoEscape(r'\clearpage')) try: doc.generate_tex() doc.generate_pdf(clean_tex=False) print('done?') except Exception as e: print(f"OH NO") print(e)
def _get_section(**content): if content['level'] == 1: return Section(content['title']) elif content['level'] == 2: return Subsection(content['title']) elif content['level'] == 3: return Subsubsection(content['title'])
def format_latex(title, soup): # create document doc = Document() # set preamble doc.preamble.append(Command('title', title)) doc.append(NoEscape(r'\maketitle')) # get the main content body main_content = soup.body.find('div').find('div') elements = main_content.find_all(True) # iterate over elements for ele in elements: if ele.name == 'h1': doc.append(Section(ele.text)) elif ele.name == 'h2': doc.append(Subsection(ele.text)) elif ele.name == 'h3': doc.append(Subsubsection(ele.text)) elif ele.name == 'h4': doc.append(Paragraph(ele.text)) elif ele.name == 'h5': doc.append(Subparagraph(ele.text)) elif ele.name == 'p': doc.append(ele.text + '\n') elif ele.name == 'ul': with doc.create(Itemize()) as item: for li in ele.find_all('li'): item.add_item(li.text) elif ele.name == 'ol': with doc.create(Enumerate()) as enum: for li in ele.find_all('li'): enum.add_item(li.text) elif ele.name == 'img': with doc.create(Figure(position='h!')) as fig: # create tmp directory for images pathlib.Path('build/images').mkdir(parents=True, exist_ok=True) # check if source is using // shorthand for http:// src = ele['src'] if src.startswith('//'): src = 'http:' + src # generate image path image_path = 'images/' + src.split('/')[-1] # retrieve image print('downloading image ' + src) headers = {'User-Agent': USER_AGENT} response = requests.get(src, stream=True, headers=headers) with open('build/' + image_path, 'wb') as f: response.raw.decode_content = True shutil.copyfileobj(response.raw, f) # append image fig.add_image(image_path) return doc
def create_sub_sub_section(io_doc, i_specification_line, io_sections_dict, io_characteristic_bloc): if i_specification_line.sub_sub_section not in io_sections_dict[ i_specification_line.section][i_specification_line.sub_section]: with io_doc.create(Subsubsection( i_specification_line.sub_sub_section)): fill_paragraph(io_doc, i_specification_line, io_sections_dict, io_characteristic_bloc) else: fill_paragraph(io_doc, i_specification_line, io_sections_dict, io_characteristic_bloc)
def _trousseau2allfields(self, T, sort_it=True): """ Generates latex code for the verbose of a list of ccsds keys Args: * T (trousseau): the trousseau to generate code on * sort_it (bool): whether to re-order the keys using their start values """ if T is None or getattr(T, 'size', 0) == 0: return [] unit = "octet" if T.octets else "bit" if sort_it: Tkeys = sorted(T.keys, key=lambda x: x.start) else: Tkeys = T.keys res = [] start_bit = 0 for cle in Tkeys: clesection = Subsubsection(cle.name) items = [] if not cle.relative_only: start_bit = cle.start # if no padding, allows flexible len of the ccsdskey the_len = cle.len if cle.pad else "[0..{:d}]".format(cle.len) if cle.isdic: the_verbose = "N/A" if cle.dic_force is not None: the_type = repr(cle.dic[cle.dic_force]) else: the_type = repr(cle.dic.values()) else: the_type = re.search(r'type *= *([\S ]+)', getattr(cle._fctunpack, 'func_doc', '')) the_verbose = re.search( r'verbose *= *([\S ]+)', getattr(cle._fctunpack, 'func_doc', '')) if the_type is None: the_type = '-' else: the_type = the_type.group(1) if the_verbose is None: the_verbose = "N/A" else: the_verbose = the_verbose.group(1) items += [r"{}".format(cle.verbose), r"Start {}: {}, length ({}s): {}, value: {}"\ .format(unit, start_bit, unit, the_len, the_type), r"Conversion: {}".format(the_verbose)] if cle.name != cle.disp: items += ["Shortened as '{}'".format(cle.disp)] for item in items: clesection.append(item) clesection.append(NewLine()) res.append(clesection) start_bit += cle.len return res
def fromSection(self, s: Section): level, content = s.level, s.content content = self.fromTokenLine(s.content) if s.level == 1: return TSection(content, label=False) elif level == 2: return Subsection(content, label=False) elif level == 3: return Subsubsection(content, label=False) elif level == 4: return NoEscape( r"\\\noindent{{\large\textbf{{{}}}}}\\".format(content)) # TODO 使用paragraph还需要一些其他的包括字体在内的设置 # return NoEscape(rf"\paragraph{{\textbf{{{content}}}}}\\") elif level == 5: return NoEscape(r"\\\noindent{{\textbf{{{}}}}}\\".format(content))
def de_section(s: lines.Section): level, content = s.level, s.children content = NoEscape("".join([de_token(c) for c in content])) if s.level == 1: sec = TSection(content, label=False) elif level == 2: sec = Subsection(content, label=False) elif level == 3: sec = Subsubsection(content, label=False) elif level == 4: sec = TParagraph(content, label=False) elif level == 5: sec = Subparagraph(content, label=False) else: assert False return sec
def draw_top_models(self, n): ''' - functions draw a table of the top models, with theri details: name, score, parameters. - takes the data frame of the models as input. - print the best ensemble models, then the best single models in a table ''' self.doc.append(NoEscape(r'\leftskip=0pt')) with self.doc.create( Section('Top' + ' ' + str(n) + ' ' + 'Models', numbering=False)): self.doc.append(NoEscape(r'\leftskip=20pt')) single_models_table = Tabular("|c|c|c|") single_models_table.add_hline() single_models_table.add_row(["learner", "Score", "Parameters"]) single_models_table.add_hline() # if ensemble print it, else append to the table k = 0 single = 0 ens = 0 for model in self.experiments.values: if k >= n: break print 'Model---\n', model[0] #edit if model[0] != "ensembleSelection": #self.doc.append(NoEscape(r'\leftskip=20pt')) #self.print_ensemble(model) #else: data = [model[0], model[1], self.format_dict(model[3])] single_models_table.add_row(data) single_models_table.add_hline() single += 1 k += 1 if single > 0: self.doc.append(NoEscape(r'\leftskip=20pt')) with self.doc.create( Subsubsection('Single Models', numbering=False)): self.doc.append(NoEscape(r'\leftskip=40pt')) self.doc.append(single_models_table)
def add_title_page(doc: Document, camp: Camp): # packages used for this page doc.packages.append(Package('datetime')) doc.packages.append(Package('graphicx')) doc.packages.append(Package('xcolor')) # create content doc.preamble.append( Command( 'title', NoEscape( r'\Huge \textbf{' + camp.get_camp_name() + r'} \\ \vspace{1.65cm} \Large \textbf{Handbuch Lagerküche}' r'\\ \vspace{8cm}'))) doc.preamble.append( Command('author', NoEscape(r'\normalsize ' + camp.get_full_author_name()))) doc.preamble.append( Command('date', NoEscape(r'\normalsize Version vom \today'))) # no page numbers doc.append(Command('clearpage')) doc.append(Command('maketitle')) # add title doc.append(Command('thispagestyle', arguments='empty')) doc.append(Command('vfill')) doc.append(Command('noindent')) doc.append(NoEscape('{')) doc.append(Command('color', arguments='gray')) doc.append(Subsubsection('Haftungsausschluss', numbering=False)) doc.append( SmallText( 'Dieses Dokument wurde automatisch erstellt. Obwohl uns Qualität und Richtigkeit sehr am ' 'Herzen liegt, können wir Fehler nie ganz ausschliessen. eMeal – Menüplanung haftet nicht ' 'für Schäden, die im Zusammenhang mit diesem Export entstanden sind. Bitte kontrolliere ' 'diesen Export vor dem Lager auf Vollständigkeit.')) doc.append(NoEscape('}'))
def append_neutral_line(self, doc, tpdf): with doc.create(Section(tpdf.step_neutral_line)): with doc.create(Subsection(tpdf.neutral_line_formula)): doc.append(NoEscape(tpdf.neutral_line_tip)) self.pdf.add_equation( r'0 = \frac{N}{A} - \frac{My}{Iy} \cdot z - \frac{Mz}{Iz} \cdot y' ) for i in range(len(self.mng.neutral_line_data_list)): with doc.create( Subsection( f'{tpdf.calculating_for} N = {self.mng.neutral_line_data_list[i].normal_force} N, ' f'My = {append_result(self.mng.neutral_line_data_list[i].moment_y)} Nm, ' f'Mz = {append_result(self.mng.neutral_line_data_list[i].moment_x)} Nm, ' f'y = {self.mng.neutral_line_data_list[i].y} m, ' f'z = {self.mng.neutral_line_data_list[i].z} m')): with doc.create(Subsubsection(tpdf.step_neutral_line)): self.pdf.add_equation( f'0 = {append_step(self.mng.neutral_line_data_list[i].normal_stress)}' ) """neutral line specifically doesn't need/work with append_result function so round_expr is called directly""" self.pdf.add_equation( f'{round_expr(self.mng.neutral_line_data_list[i].neutral_line)}' )
addObjPlots(doc, options, objs, pol) for o in objs.keys(): objs[o].clear() section = Section('%s' % (presec))#.replace("_", "\_"))) doc.append(section) print("create section: " + presec) if parts[1] != prevsubsec: prevsubsec = parts[1] subsection = Subsection('%s' % (prevsubsec))#.replace("_", "\_"))) section.append(subsection) print("create subsection: " + prevsubsec) if len(parts) > 2: subsubsection = Subsubsection('%s' % (parts[2]))#.replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[2]) else: subsubsection = Subsubsection('%s' % (parts[1]))#.replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[1]) pol=solutions[0][3] if solutions[0][3] == 'SAT': solutions.sort(key=lambda x: (x[3], x[1])) table = Tabular('l|r|l|r|r|r') subsubsection.append(table) table.add_hline() table.add_row(("Config.", 'Status', "#Sol", 'Time(sec)', 'Build(sec)', 'Nodes'))
row['other sectors short-term'], row['unclassified']) doc.append(NoEscape(r'}')) #4.1.2 with doc.create( Subsection('By Sector[Public/Private; Financial/Non-Financial]')): doc.append( NoEscape( r"\href{https://www.trounceflow.com/app/colombia/#tab_edsector}{View the chart }" )) doc.append( 'on trounceflow.com and download the data straight from the chart\n' ) doc.append('Recent values are as follows:') with doc.create(Subsubsection('Public/Private:', numbering=False)): doc.append(bold('COP bn\n')) with doc.create(Tabular('l|r|r|r|r')) as table: table.add_row('Date', 'Private (long-term)', 'Private (short-term)', 'Public (long-term)', 'Public (short-term)') table.add_hline() for index, row in dfExtDebtBySecPrivCOP.iterrows(): table.add_row(row['date'], row['private sector long-term'], row['private sector short-term'], row['public sector long-term'], row['public sector short-term']) with doc.create( Subsubsection('Financial/Non-Financial:', numbering=False)): doc.append(bold('COP bn\n'))
if secao['atividade'] == 'Perfil da Empresa': doc.quadro_empresa(json_ppra) if secao['atividade'] == 'Quadro de Funcionarios': doc.quadro_funcionarios(json_ppra['quadroFuncionarios'], json_ppra['qtdMasculino'], json_ppra['qtdFeminino'], json_ppra['numFunc']) if secao['atividade'] == 'Quadro de EPIs': doc.quadro_epis(json_ppra['fichaExposicao']) if secao['atividade'] == 'Responsavel pelo PPRA': doc.quadro_responsavel(json_ppra['profissionais'], json_ppra['dataEmissao']) if secao['atividade'] == 'Assinatura dos Técnicos': doc.assinatura_tecnicos(json_ppra['profissionais']) for sub_secao in secao['tituloFilho']: # gera titulo filho Subsecao with doc.create(Subsection(sub_secao['titulo'])): doc.append(sub_secao['descricao']) if sub_secao['atividade'] == 'Acões': doc.quadro_planejamento(json_ppra['planejamentoAcoes']) if sub_secao['atividade'] == 'Ficha de Risco': doc.quadro_fichas(json_ppra['fichaExposicao']) for sub_sub_secao in sub_secao['tituloFilho']: # gera titulo filho Subsubsecao with doc.create(Subsubsection( sub_sub_secao['titulo'])): doc.append(sub_sub_secao['descricao']) doc.generate_pdf('ppra', clean_tex=False, compiler='pdflatex') tex = doc.dumps() # The document as string in LaTeX syntax
def fill(self): with self.doc.create( Section("{} system.".format(self.config['system_name']))): self.doc.append(self.system.system_description) with self.doc.create(Section("Peripherals.")): added_instances = [] for peri_name, peri_class in sorted( self.system.peripherals.items()): if peri_class.name() in added_instances: continue added_instances.append(peri_class.name()) with self.doc.create(Section(peri_class.name(), numbering=True)): self.doc.append( peri_class.get_kv('peripheral_description').replace( '"', '')) self.doc.append(NewLine()) #self.doc.append(MediumText(bold("slave ports."))) for val_info, val_type in ((peri_class.registers, 'Registers'), (peri_class.rams, 'Rams'), (peri_class.fifos, 'Fifos')): if len(val_info) == 0: continue #self.doc.add(text=val_type, size="medium") added_val_types = [] for key, val in sorted(val_info.items()): if val.name() in added_val_types: continue added_val_types.append(val.name()) with self.doc.create( Subsection("{} slave.".format( val.name().lower()), numbering=True)): if val.get_kv('slave_description') is not None: self.doc.append( val.get_kv( 'slave_description').replace( '"', '')) added_fields = [] for field_key, field_val in sorted( val.fields.items()): real_name = field_val.name().strip().split( '.')[0] if real_name in added_fields: continue added_fields.append(real_name) with self.doc.create( Subsubsection("{} field.".format( "{}".format(real_name)), numbering=True)): self.doc.append( field_val.get_kv( 'field_description').replace( '"', ''))
table.add_row('Date', 'Banks', 'Government','Monetary Authorities','Other','Total') table.add_hline() for index, row in dfExtSecUSD.iterrows(): table.add_row(row['date'], row['banks'], row['central government'], row['monetary authorities'], row['other sectors'], row['Total']) doc.append(bold('\n\nARS bn\n')) with doc.create(Tabular('l|r|r|r|r|r')) as table: table.add_row('Date', 'Banks', 'Government','Monetary Authorities','Other','Total') table.add_hline() for index, row in dfExtSecARS.iterrows(): table.add_row(row['date'], row['banks'], row['central government'], row['monetary authorities'], row['other sectors'], row['Total']) #Section 4.1.2 with doc.create(Subsection('By Currency [Domestic, External]')): #External Debt by Currency ... Foreign-held Debt b #4.1.2.1 with doc.create(Subsubsection('Foreign holdings of domestic currency bonds', numbering=False)): doc.append(NoEscape(r"\href{https://www.indec.gob.ar/}{View the data }")) doc.append('from the primary source (argentina.gob.ar)\n') doc.append(NoEscape(r"\href{https://www.trounceflow.com/app/argentina/#tab_edcurrency}{View the chart }")) doc.append('on trounceflow.com and download the data straight from the chart\n') doc.append('Recent values are as follows:\n') doc.append(bold('USD bn\n')) with doc.create(Tabular('l|r|r|r|r|r')) as table: table.add_row('Date', 'ARS', 'USD','EUR','Other','Total') table.add_hline() for index, row in dfExtCurUSD.iterrows(): table.add_row(row['date'],row['local currency_'], row['usd_'],row['eur_'],row['others_'], row['Total']) #doc.append(NewPage()) doc.append(bold('\n\nARS bn\n')) with doc.create(Tabular('l|r|r|r|r|r')) as table:
def s2string(sbmlArgument, file_path=None): ''' Convert sbml to a latex string Args: param1 (string): file name to sbml OR sbml string file_path (string, optional): path for creation of a pdf file, only works with latexmk or pdflatex installed Returns: LaTeX string ''' try: import tesbml as libsbml except: import libsbml try: from libsbml import formulaToL3String, writeMathMLToString, parseFormula, readMathMLFromString except: from tesbml import formulaToL3String, writeMathMLToString, parseFormula, readMathMLFromString import math import pathlib # For extracting file extensions import os def getLaTeXFromAST(tree): # xmlstr = writeMathMLToString(tree) # # Strip out the header # xmlstr = xmlstr.replace ('<?xml version="1.0" encoding="UTF-8"?>', '') # # return mathml2latex_yarosh(xmlstr).strip ('$') from MATH import convertToInfix return convertToInfix(tree) #The zeroes are out of nessessity, I don't know why, but just having a single obj variable does not work #So, predefined all classes that are used later def listfiller(Commands, obj=0, R=0, Sp=0, ass=0, Par=0, tr=0, libsbml=libsbml, tofill=[], twoD=1): ''' Uses a dismal method of evaluating a piece of code from 'Commands' to fit a specific string into 'tofill' takes in a libsbml object as obj if twoD = 0, then does not fill 'tofill' with the templin as one element but returns the compiled templin as 1-D list ''' l = len(Commands) templin = [None] * l for i in range(l): templin[i] = eval(Commands[i]) if twoD == 1: tofill.append(templin) return tofill elif twoD == 0: return templin def round_half_up(n, decimals=0): ''' use this to round numbers that are way to big to put in a table ''' multiplier = 10**decimals return math.floor(n * multiplier + 0.5) / multiplier def lawcutter(prefix): ''' cuts up the string version of the KineticLaw object into something the mathml converter can read ''' lis = prefix.split('\n') i = len(lis) - 1 if (' <listOfParameters>' in lis): i = lis.index(' <listOfParameters>') lis = lis[1:i] for n in range(0, len(lis)): lis[n] = lis[n][ 2:] #so, here we are messing with indentation, not sure if it will be consistent #for all models or even if it is nessessary, but it's here newstr = '\n'.join(lis) return newstr def notecutter(prefix): ''' same as lawcutter but for notes ''' prefix = prefix.replace("\n", "") lis = prefix.split('>') i = len(lis) - 2 lis = lis[1:i] #for n in range(0, len(lis)): # lis[n] =lis[n][1:] newstr = '>'.join(lis) newstr = newstr + '>' return newstr # ---------------------------------------------- # Start of sb2l # ---------------------------------------------- reader = libsbml.SBMLReader() # Check if its a file name if os.path.isfile(sbmlArgument): suff = pathlib.Path(sbmlArgument).suffix if suff == '.xml' or suff == '.sbml': sbmldoc = reader.readSBMLFromFile(sbmlArgument) else: # If it's not a file, assume it's an sbml string sbmldoc = reader.readSBMLFromString( sbmlArgument) # Reading in the model errors = sbmldoc.getNumErrors() numReadErr = 0 numReadWarn = 0 for i in range(errors): severity = sbmldoc.getError(i).getSeverity() if (severity == libsbml.LIBSBML_SEV_ERROR) or ( severity == libsbml.LIBSBML_SEV_FATAL): seriousErrors = True numReadErr += 1 else: numReadWarn += 1 oss = libsbml.ostringstream() sbmldoc.printErrors(oss) errMsgRead = oss.str() raise RuntimeError(errMsgRead) Model_id = sbmldoc.model.getName( ) # This is essentially how each list is filled, using commands from LibSBML if len(Model_id) < 1: Model_id = sbmldoc.model.getId() Model_id = Model_id.replace(r'_', r'\_') Compartments = [] Species = [] Parameters = [] Reactions = [] Events = [] Rules = [] FunctionDefinitions = [] FunctionArgList = [] # making a notes list lis = None notes = sbmldoc.model.getNotesString() if len(notes) != 0: lis = notecutter(notes).split('<') lis = lis[2:len(lis)] del notes l = sbmldoc.model.getNumCompartments() if l != 0: ComList = [ 'obj.getId()', 'obj.getSBOTerm()', 'obj.getSpatialDimensions()', 'obj.getSize()', 'obj.getConstant()' ] for x in range(0, l): obj = sbmldoc.model.getCompartment(x) Compartments = listfiller( ComList, obj=obj, tofill=Compartments) # see the function above del (ComList) l = sbmldoc.model.getNumSpecies() if l != 0: SpecList = [ 'obj.getId()', 'obj.getInitialConcentration()', 'obj.getHasOnlySubstanceUnits()', ' obj.getBoundaryCondition()', 'obj.getConstant()' ] for x in range(0, l): obj = sbmldoc.model.getSpecies(x) Species = listfiller(SpecList, obj=obj, tofill=Species) if not math.isnan(Species[x][1]): if (Species[x][1] * 1000 < 1): # need this to round things to fit in the table Species[x][1] = round_half_up(Species[x][1], decimals=6) else: Species[x][1] = round_half_up(Species[x][1], decimals=4) del (SpecList) l = sbmldoc.model.getNumParameters() if l != 0: ParList = ['obj.getId()', 'obj.getValue()', 'obj.getConstant()'] for x in range(0, l): obj = sbmldoc.model.getParameter(x) Parameters = listfiller(ParList, obj=obj, tofill=Parameters) del (ParList) l = sbmldoc.model.getNumReactions() if l != 0: Rlist = ['R.getId()', 'R.getReversible()', 'R.getFast()'] ReProlist = [ 'Sp.getSpecies()', 'Sp.getStoichiometry()', 'Sp.getConstant()' ] Modlist = ['obj.getSpecies()'] for x in range(0, l): R = sbmldoc.model.getReaction(x) RL = listfiller( Rlist, R=R, twoD=0 ) #starting the element of common matrix/list to append at the end #making the list for reactants lRe = R.getNumReactants() ReL = [] for y in range(0, lRe): Sp = R.getReactant(y) ReL = listfiller(ReProlist, Sp=Sp, tofill=ReL) RL.append(ReL) del (lRe, ReL) #Adding reactants list to RL #making the list for products lPro = R.getNumProducts() ProL = [] for y in range(0, lPro): Sp = R.getProduct(y) ProL = listfiller(ReProlist, Sp=Sp, tofill=ProL) RL.append(ProL) del (Sp, ProL, y, lPro) #Adiing products list to RL #making the law thing law = R.getKineticLaw() prefix = law.toSBML() Formula = lawcutter(prefix) # repeating the deleted list for now, so that code works consitstnently ParList = [ 'Par.getId()', 'Par.getValue()', 'Par.getDerivedUnitDefinition()', 'Par.getConstant()' ] lPar = law.getNumParameters() ParL = [] for y in range(0, lPar): Par = law.getParameter(y) ParL = listfiller(ParList, Par=Par, tofill=ParL) KinLaw = [Formula, ParL] RL.append(KinLaw) del (Formula, law) lMod = R.getNumModifiers() if lMod > 0: ModL = [] for y in range(0, lMod): obj = R.getModifier(y) ModL = listfiller(Modlist, obj=obj, tofill=ModL) RL.append(ModL) Reactions.append( RL ) #Appending all info about a given reaction to the common list del (RL, R, Rlist, ReProlist, ParList, lPar, ParL, KinLaw, prefix) l = sbmldoc.model.getNumEvents() if l != 0: TrList = ['tr.getInitialValue()', 'tr.getPersistent()', 'tr.getMath()'] AsList = ['ass.getId()', 'ass.getMath()'] for x in range(0, l): eve = sbmldoc.model.getEvent(x) #get the event tr = eve.getTrigger() TrigL = [0, 0, 0] TrigL = listfiller(TrList, tr=tr, tofill=TrigL, twoD=0) #define trigger things m = eve.getNumEventAssignments() AssL = [] for i in range(0, m): ass = eve.getEventAssignment(i) AssL = listfiller( AsList, ass=ass, tofill=AssL ) #add up all of the ID = Formula in a single list del (i, m) Events.append([eve.getId(), eve.getName(), TrigL, AssL]) del (TrList, AsList, eve, tr, TrigL, ass, AssL) l = sbmldoc.model.getNumRules() if l != 0: RuList = ['obj.getVariable()', 'obj.getFormula()'] for x in range(0, l): obj = sbmldoc.model.getRule(x) Rules = listfiller(RuList, obj=obj, tofill=Rules) del (RuList) del (obj) l1 = sbmldoc.model.getNumFunctionDefinitions() if l1 != 0: FuncList = ['obj.getId()', 'obj.getBody()'] for x in range(0, l1): obj = sbmldoc.model.getFunctionDefinition(x) FunctionDefinitions = listfiller(FuncList, obj=obj, tofill=FunctionDefinitions) l2 = obj.getNumArguments() if l2 != 0: for k in range(0, l2): FunctionArgList.append(obj.getArgument(k)) del (libsbml, lawcutter, l, notecutter, listfiller) # The part where everything is compiled into the TeX file from pylatex import Document, Section, Subsection, Subsubsection, Command, Math, Tabular, LongTable from pylatex import Table, LineBreak from pylatex.utils import italic, NoEscape, bold doc = Document() # start a doc doc.packages.append(NoEscape(r'\usepackage{xcolor}')) doc.packages.append(NoEscape(r'\usepackage{titlesec}')) doc.packages.append(NoEscape(r"\usepackage{hyperref}")) doc.packages.append( NoEscape(r"\hypersetup{colorlinks=true,linkcolor=blue,urlcolor=blue}")) doc.packages.append(NoEscape(r"\usepackage{amsmath}")) doc.packages.append(NoEscape(r"\usepackage{breqn}")) doc.preamble.append(NoEscape(r'\definecolor{blue}{cmyk}{.93, .59, 0, 0}')) doc.preamble.append('') doc.preamble.append(NoEscape(r'\titleformat{\chapter}[display]')) doc.preamble.append( NoEscape(r' {\normalfont\sffamily\huge\bfseries\color{blue}}')) doc.preamble.append( NoEscape(r' {\chaptertitlename\ \thechapter}{20pt}{\Huge}')) doc.preamble.append(NoEscape(r'\titleformat{\section}')) doc.preamble.append( NoEscape(r' {\normalfont\sffamily\Large\bfseries\color{blue}}')) doc.preamble.append(NoEscape(r' {\thesection}{1em}{}')) doc.preamble.append(NoEscape(r'\titleformat{\subsection}')) doc.preamble.append( NoEscape(r' {\normalfont\sffamily\large\bfseries\color{blue}}')) doc.preamble.append(NoEscape(r' {\thesubsection}{1em}{}')) doc.preamble.append(NoEscape(r'\titleformat{\subsubsection}')) doc.preamble.append( NoEscape(r' {\normalfont\sffamily\normalsize\bfseries\color{blue}}')) doc.preamble.append(NoEscape(r' {\thesubsubsection}{1em}{}')) doc.append(NoEscape(r'\begin{center}')) doc.append( NoEscape(r'{\normalfont\sffamily\huge\bfseries SBML Model Report}\\')) doc.append(NoEscape(r'\vspace{5mm}')) doc.append( NoEscape( r'{\normalfont\sffamily\LARGE\bfseries\color{blue} Model name: ' + Model_id + r'}\\')) doc.append(NoEscape(r'\vspace{5mm}')) doc.append(NoEscape(r'\large\today')) doc.append(NoEscape(r'\end{center}')) def rxn_eq(Reaction, Command=Command): ''' Stitches up a list to plug into Math function for reaction equations ''' numRe = len( Reaction[3]) # the products info is stored as a list in position 3 numPr = len(Reaction[4]) try: numMod = len(Reaction[6]) except: numMod = 0 arrow = [] plus = ['+'] Re = [] Pr = [] if numRe > 0: for i in range(0, numRe): if (i > 0): Re = Re + plus Re.append(Command( command='text', arguments=Reaction[3][i] [0])) #Appends with IDs of species that are reactannts else: Re.append(Command(command='text', arguments=['None'])) if numPr > 0: for i in range(0, numPr): # Put in the form Math class can interpret if (i > 0): Pr = Pr + plus Pr.append(Command(command='text', arguments=Reaction[4][i][0])) else: Pr.append(Command(command='text', arguments=['None'])) if numMod > 0: arg = [] for i in range(0, numMod): arg.append(Reaction[6][i][0]) arg = ", ".join(arg) arrow = [ Command(command='xrightarrow', arguments=Command(command='text', arguments=arg)) ] else: arrow = [Command('longrightarrow')] DaList = Re + arrow + Pr return DaList if lis != None: # NOTES -- made from html string, can recognize: # <a href...>, <b>, <i>,<br/> and treats emphasis as italic or bold # there is a known issue with special characters such as # not being interpreted right # to fix that, follow the structure below leng = len(lis) with doc.create(Section('Notes')): def findOccurrences(s, ch): return [i for i, letter in enumerate(s) if letter == ch] doc.append(Command('raggedright')) doc.append(Command('frenchspacing')) for i in range(0, leng): if (leng < 2): doc.append(lis[i]) continue if ( ''' in lis[i] ): #THIS if statement is being referenced above, ' is the HTML code for #the apostrophe lis[i] = lis[i].replace("'", "'") if ('&' in lis[i]): lis[i] = lis[i].replace("&", "&") if ('$' in lis[i]): lis[i] = lis[i].replace("$", "$") if ('#' in lis[i]): lis[i] = lis[i].replace("#", "#") if ('+' in lis[i]): lis[i] = lis[i].replace("+", "+") if ('!' in lis[i]): lis[i] = lis[i].replace("!", "!") if ('?' in lis[i]): lis[i] = lis[i].replace("?", "?") if ('/' in lis[i] and 'br/>' not in lis[i] and '//' not in lis[i] and len(lis[i].replace(" ", "")) < 4 and 'strong>' not in lis[i]): continue #! trying to skip every instance of </something> assuming the 4 length as cutoff elif ('br/>' in lis[i] and len(lis[i].replace(" ", "")) < 4): doc.append(LineBreak()) elif ('br/>' in lis[i]): doc.append(LineBreak()) doc.append(lis[i].replace("br/>", "")) elif ('p>' in lis[i]): doc.append(Command('par')) doc.append(lis[i][2:len(lis[i])]) elif ('sub>' in lis[i] and '/sub>' not in lis[i]): temp = lis[i].replace("sub>", "") doc.append(NoEscape("$_{\\text{" + temp + "}}$")) elif (('b>' in lis[i] or 'strong>' in lis[i]) and ('/b>' not in lis[i]) and ('/strong>' not in lis[i]) and ('/sub>' not in lis[i])): temp = lis[i].replace("b>", "") temp = temp.replace("strong>", "") doc.append(bold(temp)) elif (('i>' in lis[i] or 'em>' in lis[i]) and ('/i>' not in lis[i]) and ('/em>' not in lis[i])): temp = lis[i].replace("i>", "") temp = temp.replace("em>", "") doc.append(italic(temp)) elif (('/b>' in lis[i]) or ('/strong>' in lis[i]) or ('/i>' in lis[i]) or ('/em>' in lis[i]) or ('/sub>' in lis[i])): temp = lis[i].replace("/i>", "") temp = temp.replace("/em>", "") temp = temp.replace("/b>", "") temp = temp.replace("/strong>", "") temp = temp.replace("/sub>", "") doc.append(temp) elif ('a href=' in lis[i]): t_list = lis[i].split('>') pos = findOccurrences(t_list[0], '\"') link = t_list[0][pos[0] + 1:pos[ 1]] #! Assuming that the first to places with " \" " #will surround the link doc.append( NoEscape("\href{" + link + "}" + "{" + t_list[1] + "}")) #! Assuming that in a hyperlink notation: # i. e <a href="http://link.com">text that the author wants to be seen</a> else: pos = findOccurrences(lis[i], '>') doc.append(lis[i][pos[0] + 1:]) del (leng) with doc.create(Section('Contents')): # Summary of contents of sbml model doc.append('The number of components in this model:') doc.append(NoEscape(r'\\[2mm]')) with doc.create(Table(position='htpb')) as table1: doc.append(NoEscape(r'\centering')) tbl_cmnd = '' tbl_cmnd = 'l|c|l|c' with doc.create(Tabular(tbl_cmnd, booktabs=True)) as table: table.add_row('Element', 'Quantity', 'Element', 'Quantity') table.add_hline() table.add_row('Compartment', str(len(Compartments)), 'Species', str(len(Species))) table.add_row('Reactions', str(len(Reactions)), 'Events', str(len(Events))) table.add_row('Global Parameters', str(len(Parameters)), 'Function Definitions', str(len(FunctionDefinitions))) table1.add_caption('Components in this model.') # COMPARTMENTS TABLE listlen = len(Compartments) #number of rows sublistlen = len(Compartments[0]) #number of columns tbl_cmnd = '' tbl_cmnd = tbl_cmnd.join('c|' for i in range(0, sublistlen)) tbl_cmnd = tbl_cmnd[:-1] with doc.create(Section('Compartments')): doc.append('Table of comparments in the model:') with doc.create(LongTable(tbl_cmnd, booktabs=True)) as table: table.add_row(('ID', 'SBO ', 'Spatial ', 'Size', 'Constant')) table.add_row(('', 'Term', 'Dimensions', '', '')) table.add_hline() for i in range(0, listlen): if math.isnan(Compartments[i][1]): Species[i][1] = 'undefined' table.add_row(tuple(Compartments[i])) # SPECIES TABLE # getting info from the list listlen = len(Species) #number of rows sublistlen = len(Species[0]) #number of columns tbl_cmnd = '' #tbl_cmnd.join('X|' for i in range(0, sublistlen)) tbl_cmnd = tbl_cmnd.join('c|' for i in range(0, sublistlen)) tbl_cmnd = tbl_cmnd[:-1] # Remove last character, dont want verical line # making a tble for latex # As the most simple way of doing this, we can convert the lists into tuples and just paste into # the add_row command. For something more complicated: some if statements would be useful with doc.create(Section('Species')): doc.append('Table of species in the model:') with doc.create(LongTable(tbl_cmnd, booktabs=True)) as table: table.add_row(('ID', 'Initial ', 'Only ', 'Boundary', 'Constant')) table.add_row( ('', 'Concentration', 'Substance Units', ' Conditions', '')) table.add_hline() for i in range(0, listlen): if math.isnan(Species[i][1]): Species[i][1] = 'undefined' table.add_row(tuple(Species[i])) # GLOBAL PARAMETER TABLE listlen = len(Parameters) #number of rows if (listlen < 1): with doc.create(Section('Parameters')): doc.append( 'The function could not identify any global Parameters in the model' ) else: sublistlen = len(Parameters[0]) #number of columns tbl_cmnd = '' #tbl_cmnd.join('X|' for i in range(0, sublistlen)) tbl_cmnd = tbl_cmnd.join('c|' for i in range(0, sublistlen)) tbl_cmnd = tbl_cmnd[: -1] # Remove last character, dont want verical line with doc.create(Section('Parameters')): doc.append( 'The following table is the list of Parameters in the model.') with doc.create(LongTable(tbl_cmnd, booktabs=True)) as table: table.add_row(('ID', 'Value', 'Constant')) table.add_hline() for i in range(0, listlen): table.add_row(tuple(Parameters[i])) # PROCESS RULES listlen = len(Rules) if (listlen >= 1): with doc.create(Section('Rules')): doc.append('Number of rules in the model: ' + str(listlen)) for i in range(0, listlen): with doc.create( Subsection('Rule ' + str(i + 1) + ': ' + Rules[i][0])): doc.append(Math(data=[Rules[i][0] + '=' + Rules[i][1]])) # PROCESS FUNCTION DEDFINITIONS listlen = len(FunctionDefinitions) if (listlen >= 1): with doc.create(Section('Function Definitions')): doc.append('Number of usr defined functions in the model: ' + str(listlen)) for i in range(0, listlen): latexstr = getLaTeXFromAST(FunctionDefinitions[i][1]) with doc.create(Subsection('Function ' + str(i + 1))): doc.append(Command("begin", "dmath*")) doc.append( NoEscape( '$$' + '\\text{' + FunctionDefinitions[i][0].replace('_', '\\_') + '}\ (')) for j in range(0, len(FunctionArgList)): latexarg = getLaTeXFromAST(FunctionArgList[j]) if j == len(FunctionArgList) - 1: doc.append( NoEscape(str(latexarg.replace('_', '\\_')))) else: doc.append( NoEscape(latexarg.replace('_', '\\_') + ',')) doc.append( NoEscape('): ' + latexstr.replace('_', '\\_') + '$$')) doc.append(Command("end", "dmath*")) # PROCESS EVENTS listlen = len(Events) if (listlen >= 1): with doc.create(Section('Events')): doc.append('Number of events defined in the model: ' + str(listlen)) for i in range(0, listlen): with doc.create( Subsection('Event ' + str(i + 1) + ': ' + Events[i][0])): if (len(Events[i][1]) > 0): with doc.create(Subsubsection('Name', numbering=False)): doc.append(Events[i][1]) with doc.create(Subsubsection('Trigger', numbering=False)): doc.append( NoEscape('$$' + getLaTeXFromAST(Events[i][2][2]) + '$$')) with doc.create( Subsubsection('Assignments', numbering=False)): for j in range(0, len(Events[i][3])): assTree = parseFormula(Events[i][3][j][0]) ass = '$$' + getLaTeXFromAST( assTree) + '=' + getLaTeXFromAST( Events[i][3][j][1]) + '$$' doc.append(NoEscape(ass)) # PROCESS REACTIONS listlen = len(Reactions) # number of rows with doc.create(Section('Reactions')): doc.append('Number of reactions in the model: ' + str(listlen)) for i in range(0, listlen): with doc.create( Subsection('Reaction ' + str(i + 1) + ': ' + Reactions[i][0])): with doc.create( Subsubsection('Reaction equation', numbering=False)): doc.append(Math(data=rxn_eq(Reaction=Reactions[i]))) with doc.create(Subsubsection('Kinetic Law', numbering=False)): m = readMathMLFromString(Reactions[i][5][0]) formula = getLaTeXFromAST(m) formula = formula.replace('\mathrm', '\ \mathrm') doc.append(Command("begin", "dmath*")) doc.append( NoEscape('$$v =' + formula.replace('_', '\\_') + '$$')) doc.append(Command("end", "dmath*")) with doc.create(Subsubsection('Local Parameters')): if len(Reactions[i][5][1]) != 0: sublistlen = len(Reactions[i][5][1][0]) tbl_cmnd = '' tbl_cmnd = '||' + tbl_cmnd.join( 'c|' for n in range(0, sublistlen)) + '|' with doc.create(LongTable(tbl_cmnd, booktabs=False)) as table: table.add_hline() table.add_row(('ID', 'Value', 'Units', 'Constant')) table.add_hline() table.add_hline() listleng = len(Reactions[i][5][1]) for j in range(0, listleng): table.add_row(tuple(Reactions[i][5][1][j])) table.add_hline() else: doc.append('No LOCAL Parameters found') del (Command, Document, NoEscape, Section, Subsection, italic) return doc.dumps()
def report(context, json_report, json_varreport, rulegraph_img): config = json_report sample_config = json.load(open(json_report)) var_config = json.load(open(get_config(json_varreport))) tex_path = os.path.abspath( os.path.join(sample_config["analysis"]["analysis_dir"], "delivery_report")) if not rulegraph_img: rulegraph_img = sample_config['analysis']['dag'] os.makedirs(tex_path, exist_ok=True) geometry_options = { "head": "40pt", "headheight": "130pt", "headsep": "1cm", "margin": "1.5cm", "bottom": "1.5cm", "includeheadfoot": True } doc = Document(geometry_options=geometry_options) doc.packages.append(Package('lscape')) doc.packages.append(Package('longtable')) doc.packages.append(Package('float')) doc.packages.append(Package('caption', options='labelfont=bf')) doc.append( NoEscape( r'\captionsetup[table]{labelsep=space, justification=raggedright, singlelinecheck=off}' )) #Add first page style first_page = PageStyle("header", header_thickness=1) #Add Header with first_page.create(Head("C")) as mid_header: with mid_header.create( MiniPage(width=NoEscape(r"0.2\textwidth"), pos='c')) as logo_wrapper: logo_file = os.path.join(os.path.dirname(__file__), '..', 'assests/cg.png') logo_wrapper.append( StandAloneGraphic(image_options="width=50px", filename=logo_file)) with mid_header.create( Tabularx( "p{3cm} p{2cm} X X p{4cm} p{3cm}", width_argument=NoEscape(r"0.8\textwidth"))) as mid_table: mid_table.add_row( [MultiColumn(6, align='r', data=simple_page_number())]) mid_table.add_row([ MultiColumn(6, align='c', data=MediumText("Molecular report on")) ]) mid_table.add_row([ MultiColumn(6, align='c', data=MediumText(get_sample_name(config))) ]) mid_table.add_empty_row() mid_table.add_row([ 'gender', "NA", " ", " ", 'Sample recieved:', sample_config['analysis']['date']['sample_received'] ]) mid_table.add_row([ 'tumor type', "NA", " ", " ", 'Analysis completion:', sample_config['analysis']['date']['analysis_finish'] ]) mid_table.add_row([ 'analysis type', "NA", " ", " ", 'PDF Report date:', datetime.now().strftime("%Y-%m-%d %H:%M") ]) mid_table.add_row( ['sample type', "NA", " ", " ", 'Delivery date', "NA"]) mid_table.add_row([ 'sample origin', "NA", " ", " ", 'Analysis:', r'BALSAMIC v' + sample_config['analysis']['BALSAMIC'] ]) doc.preamble.append(first_page) #End First page # doc.preamble.append( # Command( # 'title', # NoEscape(r'BALSAMIC v' + sample_config["analysis"]["BALSAMIC"] + # r'\\ \large Developer Report'))) # doc.preamble.append( # Command('author', 'Patient ID: ' + get_sample_name(config))) # doc.preamble.append(Command('date', NoEscape(r'\today'))) # doc.append(NoEscape(r'\maketitle')) doc.change_document_style("header") with doc.create(Section(title='Analysis report', numbering=True)): with doc.create( Subsection('Summary of variants and variant callers', numbering=True)): doc.append( "Placeholder for text about BAM alignment metrics and variant callers. Here comes the info on reads, " + "QC metrics, align metrics, and general sample information. preferabily in table format." ) doc.append("\n") summary_tables = ["TMB", "VarClass", "VarCaller", "VarCallerClass"] for i in summary_tables: shellcmd = [ os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "scripts/VariantReport.R") ] shellcmd.extend([ "--infile", sample_config["vcf"]["merged"]["SNV"], "--genomeSize", sample_config["bed"]["genome_size"], "--type", "latex", "--mode", i, "--outfile", os.path.join(tex_path, sample_config['analysis']['sample_id']) ]) print(" ".join(shellcmd)) outTab = subprocess.check_output(shellcmd) doc.append( NoEscape( outTab.decode('utf-8').replace("\\centering", "\\small"))) doc.append(NoEscape(r'\normalsize')) doc.append(NewPage()) with doc.create(Subsection("Summary of MVL report", numbering=True)): doc.append( "Placeholder for general description of MVL settings. A mention to summary " + "pipeline, summary of MVL settings. Gene coverage for identified genes should go here. Figures!" ) outCov = dict() cmd_param = defaultdict(list) J = defaultdict(list) for i in var_config["filters"]: cmd_param["TUMOR_DP"].append( var_config["filters"][i]["TUMOR"]["DP"]) cmd_param["TUMOR_AD"].append( var_config["filters"][i]["TUMOR"]["AD"]) cmd_param["TUMOR_AFmax"].append( var_config["filters"][i]["TUMOR"]["AF_max"]) cmd_param["TUMOR_AFmin"].append( var_config["filters"][i]["TUMOR"]["AF_min"]) cmd_param["TUMOR_inMVL"].append( var_config["filters"][i]["in_mvl"]) cmd_param["var_type"].append(",".join( ["SNP", "INDEL", "MNP", "OTHER"])) cmd_param["varcaller"].append(",".join( var_config["filters"][i]["variantcaller"])) cmd_param["ann"].append( ",".join(var_config["filters"][i]["annotation"]["SNV"]) + "," + ",".join(var_config["filters"][i]["annotation"]["INDEL"])) cmd_param["name"].append(i.replace("_", "\_")) cmd_param["outfile_tex"].append(tex_path + "/" + i + ".tex") cmd_param["outfile_gene"].append(tex_path + "/" + i + ".genelist") for i in cmd_param: J[i] = ";".join(cmd_param[i]) shellcmd = [ os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "scripts/VariantReport.R") ] shellcmd.extend([ "--infile", "'" + sample_config["vcf"]["merged"]["SNV"] + "'", "--dp", "'" + J["TUMOR_DP"] + "'", "--tumorad", "'" + J["TUMOR_AD"] + "'", "--afmax", "'" + J["TUMOR_AFmax"] + "'", "--afmin", "'" + J["TUMOR_AFmin"] + "'", "--inMVL", "'" + J["TUMOR_inMVL"] + "'", "--exclusiveSets", "TRUE", "--vartype", "'" + J["var_type"] + "'", "--varcaller", "'" + J["varcaller"] + "'", "--ann", "'" + J["ann"] + "'", "--name", "'" + J["name"] + "'", "--type", "latex" ]) subprocess.check_output( " ".join(shellcmd + ["--outfile", "'" + J["outfile_tex"] + "'"]), shell=True) print(" ".join(shellcmd + ["--outfile", "'" + J["outfile_tex"] + "'"])) subprocess.check_output(" ".join(shellcmd + [ "--outfile", "'" + J["outfile_gene"] + "'", "--exportGene", "T" ]), shell=True) for c, i in enumerate(var_config["filters"]): with doc.create( Subsubsection(var_config["filters"][i]["name"], numbering=True)): print(cmd_param["outfile_tex"]) fname = cmd_param["outfile_tex"][c] if os.stat(fname).st_size > 10: #get gene list with open(cmd_param["outfile_gene"][c]) as myfile: genes = myfile.read().replace('\n', '') with open(fname, 'r') as myfile: data = myfile.read() #.replace('\n', '') #doc.append(NoEscape(r'\begin{landscape}')) #longtable instead of tabular makes the table span multiple pages, but the header doesn't span. Occasionally #the alignment also is messed up. There must be a hidden package conflict OR general alignment issues. #doc.append(NoEscape(varreport.replace("{tabular}","{longtable}"))) doc.append( NoEscape( data.replace("\\centering", "\\scriptsize"))) for s in sample_config["bed"]["exon_cov"]: shellcmd = [ os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "scripts/CoverageRep.R") ] shellcmd.extend([ "--infile", sample_config["bed"]["exon_cov"][s], "--genename", genes, "--name", s.replace("_", "\_"), "--type", "latex" ]) outCov = subprocess.check_output(shellcmd) doc.append( NoEscape( outCov.decode('utf-8').replace( "\\centering", "\\scriptsize"))) #doc.append(NoEscape(r'\end{landscape}')) else: doc.append("No variants were found for this filter") # doc.append(NoEscape(r'\normalsize')) doc.append(NewPage()) with doc.create(Subsection('Coverage report')): for s in sample_config["bed"]["target_cov"]: with doc.create(Figure(position='h!')) as cov_img: covplot = ".".join( [os.path.join(tex_path, s), "Coverage.pdf"]) shellcmd = [ os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "..", "scripts/CoveragePlot.R") ] shellcmd.extend([ "--infile", sample_config["bed"]["target_cov"][s], "--outfile", covplot, "--title", s.replace("_", "\_") ]) subprocess.check_output(shellcmd) cov_img.add_image(covplot, width='450px') cov_img.add_caption('Coverage report for sample ' + s.replace("_", "\_")) doc.append(NewPage()) with doc.create(Subsection('Analysis pipeline')): with doc.create(Figure(position='h!')) as pipeline_img: pipeline_img.add_image(rulegraph_img, width='450px') pipeline_img.add_caption('BALSAMIC pipeline') doc.append(NewPage()) with doc.create(Section(title="Appendix", numbering=True)): with doc.create(Subsection("MVL settings", numbering=True)): fmt = "p{3cm}" * (len(var_config["filters"]) + 1) with doc.create(Tabular(fmt)) as data_table: header_row1 = [""] for i in var_config["filters"]: header_row1.append(var_config["filters"][i]["name"]) data_table.add_hline() data_table.add_row(header_row1, mapper=[bold], color="lightgray") data_table.add_hline() data_table.add_empty_row() column = list(var_config["filters"][next( iter(var_config["filters"]))]["TUMOR"].keys()) for i in column: row = [i] for j in var_config["filters"]: row.append(var_config["filters"][j]["TUMOR"][i]) data_table.add_row(row) row = ["MVL"] for i in var_config["filters"]: row.append(var_config["filters"][i]["in_mvl"]) row = ["Variantcallers"] for i in var_config["filters"]: row.append("\n".join( var_config["filters"][i]["variantcaller"])) data_table.add_row(row) data_table.add_hline() with doc.create( Subsection("Bioinformatic tool in pipeline", numbering=True)): doc.append( "The following Bioinformatic tools were used in the analysis:\n\n" ) with doc.create(Tabular("p{4cm}p{4cm}")) as data_table: data_table.add_hline() conda_env = glob.glob( os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "conda_yaml/*.yaml")) pkgs = get_package_split(conda_env) data_table.add_row(["Package", "Version"], color="lightgray") data_table.add_hline() data_table.add_row( ["BALSAMIC", sample_config['analysis']['BALSAMIC']]) for k, v in pkgs.items(): data_table.add_row([k, v]) doc.append(NewPage()) print(tex_path) doc.generate_tex(os.path.join(tex_path, get_sample_name(config))) # doc.generate_pdf( # os.path.join(tex_path, get_sample_name(config)), clean_tex=False) shellcmd = [ "pdflatex", "-output-directory=" + tex_path, os.path.join(tex_path, get_sample_name(config)) + ".tex", "1>", "/dev/null" ] #generate_pdf doesn't run AUX files properly and ends up with incorrect total page numbers. So subprocess for #pdflatex is called twice instead. print(" ".join(shellcmd)) subprocess.run(" ".join(shellcmd), shell=True) subprocess.run(" ".join(shellcmd), shell=True)
def main(): try: parser = argparse.ArgumentParser( description='pyprob ' + pyprob.__version__ + ' (Analytics)', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-v', '--version', help='show version information', action='store_true') parser.add_argument( '--dir', help='directory for loading artifacts and saving logs', default='.') parser.add_argument('--cuda', help='use CUDA', action='store_true') parser.add_argument( '--device', help= 'selected CUDA device (-1: all, 0: 1st device, 1: 2nd device, etc.)', default=-1, type=int) parser.add_argument('--seed', help='random seed', default=123, type=int) parser.add_argument( '--structure', help='show extra information about artifact structure', action='store_true') parser.add_argument('--saveReport', help='save a full analytics report (tex and pdf)', type=str) parser.add_argument( '--maxTraces', help= 'maximum number of unique traces to plot in the full analytics report', default=20, type=int) parser.add_argument( '--saveLoss', help='save training and validation loss history (csv)', type=str) parser.add_argument('--saveAddresses', help='save histogram of addresses (csv)', type=str) parser.add_argument('--saveTraceLengths', help='save histogram of trace lengths (csv)', type=str) opt = parser.parse_args() if opt.version: print(pyprob.__version__) quit() util.set_random_seed(opt.seed) util.set_cuda(opt.cuda, opt.device) util.logger.reset() util.logger.log_config() file_name = util.file_starting_with( '{0}/{1}'.format(opt.dir, 'pyprob-artifact'), -1) util.logger.log( colored('Resuming previous artifact: {}'.format(file_name), 'blue', attrs=['bold'])) artifact = util.load_artifact(file_name, util.cuda_enabled, util.cuda_device) util.logger.log(artifact.get_info()) util.logger.log() if opt.structure: util.logger.log() util.logger.log( colored('Artifact structure', 'blue', attrs=['bold'])) util.logger.log() util.logger.log(artifact.get_structure_str()) util.logger.log(artifact.get_parameter_str()) if opt.saveLoss: util.logger.log( 'Saving training and validation loss history to file: ' + opt.saveLoss) with open(opt.saveLoss, 'w') as f: data = [ artifact.train_history_trace, artifact.train_history_loss, artifact.valid_history_trace, artifact.valid_history_loss ] writer = csv.writer(f) writer.writerow( ['train_trace', 'train_loss', 'valid_trace', 'valid_loss']) for values in zip_longest(*data): writer.writerow(values) if opt.saveAddresses: util.logger.log('Saving address histogram to file: ' + opt.saveAddresses) with open(opt.saveAddresses, 'w') as f: data_count = [] data_address = [] data_abbrev = [] abbrev_i = 0 for address, count in sorted( artifact.address_histogram.items(), key=lambda x: x[1], reverse=True): abbrev_i += 1 data_abbrev.append('A' + str(abbrev_i)) data_address.append(address) data_count.append(count) data = [data_count, data_abbrev, data_address] writer = csv.writer(f) writer.writerow(['count', 'unique_address_id', 'full_address']) for values in zip_longest(*data): writer.writerow(values) if opt.saveTraceLengths: util.logger.log('Saving trace length histogram to file: ' + opt.saveTraceLengths) with open(opt.saveTraceLengths, 'w') as f: data_trace_length = [] data_count = [] for trace_length in artifact.trace_length_histogram: data_trace_length.append(trace_length) data_count.append( artifact.trace_length_histogram[trace_length]) data = [data_trace_length, data_count] writer = csv.writer(f) writer.writerow(['trace_length', 'count']) for values in zip_longest(*data): writer.writerow(values) if opt.saveReport: util.logger.log('Saving analytics report to files: ' + opt.saveReport + '.tex and ' + opt.saveReport + '.pdf') iter_per_sec = artifact.total_iterations / artifact.total_training_seconds traces_per_sec = artifact.total_traces / artifact.total_training_seconds traces_per_iter = artifact.total_traces / artifact.total_iterations train_loss_initial = artifact.train_history_loss[0] train_loss_final = artifact.train_history_loss[-1] train_loss_change = train_loss_final - train_loss_initial train_loss_change_per_sec = train_loss_change / artifact.total_training_seconds train_loss_change_per_iter = train_loss_change / artifact.total_iterations train_loss_change_per_trace = train_loss_change / artifact.total_traces valid_loss_initial = artifact.valid_history_loss[0] valid_loss_final = artifact.valid_history_loss[-1] valid_loss_change = valid_loss_final - valid_loss_initial valid_loss_change_per_sec = valid_loss_change / artifact.total_training_seconds valid_loss_change_per_iter = valid_loss_change / artifact.total_iterations valid_loss_change_per_trace = valid_loss_change / artifact.total_traces sys.stdout.write( 'Generating report... \r' ) sys.stdout.flush() geometry_options = { 'tmargin': '1.5cm', 'lmargin': '1cm', 'rmargin': '1cm', 'bmargin': '1.5cm' } doc = Document('basic', geometry_options=geometry_options) doc.preamble.append(NoEscape(r'\usepackage[none]{hyphenat}')) doc.preamble.append(NoEscape(r'\usepackage{float}')) # doc.preamble.append(NoEscape(r'\renewcommand{\familydefault}{\ttdefault}')) doc.preamble.append( Command('title', 'Inference Compilation Analytics')) doc.preamble.append( Command( 'date', NoEscape(datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S")))) doc.append(NoEscape(r'\maketitle')) # doc.append(NoEscape(r'\small')) with doc.create(Section('Current system')): with doc.create(Tabularx('ll')) as table: table.add_row(('pyprob version', pyprob.__version__)) table.add_row(('PyTorch version', torch.__version__)) # doc.append(NoEscape(r'\newpage')) with doc.create(Section('Artifact')): with doc.create(Subsection('File')): with doc.create(Tabularx('ll')) as table: table.add_row(('File name', file_name)) file_size = '{:,}'.format(os.path.getsize(file_name)) table.add_row(('File size', file_size + ' Bytes')) table.add_row(('Created', artifact.created)) table.add_row(('Modified', artifact.modified)) table.add_row(('Updates to file', artifact.updates)) with doc.create(Subsection('Training system')): with doc.create(Tabularx('ll')) as table: table.add_row( ('pyprob version', artifact.code_version)) table.add_row( ('PyTorch version', artifact.pytorch_version)) table.add_row(('Trained on', artifact.trained_on)) with doc.create(Subsection('Neural network')): with doc.create(Tabularx('ll')) as table: table.add_row(('Trainable parameters', '{:,}'.format( artifact.num_params_history_num_params[-1]))) table.add_row( ('Softmax boost', artifact.softmax_boost)) table.add_row(('Dropout', artifact.dropout)) table.add_row(('Standardize inputs', artifact.standardize_observes)) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 4)) ax = plt.subplot(111) ax.plot(artifact.num_params_history_trace, artifact.num_params_history_num_params) plt.xlabel('Training traces') plt.ylabel('Number of parameters') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Number of parameters.') for m_name, m in artifact.named_modules(): if not ('.' in m_name or m_name == ''): doc.append(NoEscape(r'\newpage')) with doc.create(Subsubsection(m_name)): doc.append(str(m)) for p_name, p in m.named_parameters(): if not 'bias' in p_name: with doc.create( Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 10)) ax = plt.subplot(111) plt.imshow(np.transpose( util.weights_to_image(p), (1, 2, 0)), interpolation='none') plt.axis('off') plot.add_plot( width=NoEscape(r'\textwidth')) plot.add_caption(m_name + '_' + p_name) doc.append(NoEscape(r'\newpage')) with doc.create(Section('Training')): with doc.create(Tabularx('ll')) as table: table.add_row(('Total training time', '{0}'.format( util.days_hours_mins_secs( artifact.total_training_seconds)))) table.add_row(('Total training traces', '{:,}'.format(artifact.total_traces))) table.add_row( ('Traces / s', '{:,.2f}'.format(traces_per_sec))) table.add_row(('Traces / iteration', '{:,.2f}'.format(traces_per_iter))) table.add_row(('Iterations', '{:,}'.format(artifact.total_iterations))) table.add_row( ('Iterations / s', '{:,.2f}'.format(iter_per_sec))) table.add_row(('Optimizer', artifact.optimizer)) table.add_row(('Validation set size', artifact.valid_size)) with doc.create(Subsection('Training loss')): with doc.create(Tabularx('ll')) as table: table.add_row(('Initial loss', '{:+.6e}'.format(train_loss_initial))) table.add_row( ('Final loss', '{:+.6e}'.format(train_loss_final))) table.add_row( ('Loss change / s', '{:+.6e}'.format(train_loss_change_per_sec))) table.add_row( ('Loss change / iteration', '{:+.6e}'.format(train_loss_change_per_iter))) table.add_row( ('Loss change / trace', '{:+.6e}'.format(train_loss_change_per_trace))) with doc.create(Subsection('Validation loss')): with doc.create(Tabularx('ll')) as table: table.add_row(('Initial loss', '{:+.6e}'.format(valid_loss_initial))) table.add_row( ('Final loss', '{:+.6e}'.format(valid_loss_final))) table.add_row( ('Loss change / s', '{:+.6e}'.format(valid_loss_change_per_sec))) table.add_row( ('Loss change / iteration', '{:+.6e}'.format(valid_loss_change_per_iter))) table.add_row( ('Loss change / trace', '{:+.6e}'.format(valid_loss_change_per_trace))) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 6)) ax = plt.subplot(111) ax.plot(artifact.train_history_trace, artifact.train_history_loss, label='Training') ax.plot(artifact.valid_history_trace, artifact.valid_history_loss, label='Validation') ax.legend() plt.xlabel('Training traces') plt.ylabel('Loss') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Loss plot.') doc.append(NoEscape(r'\newpage')) with doc.create(Section('Traces')): with doc.create(Tabularx('ll')) as table: table.add_row(('Total training traces', '{:,}'.format(artifact.total_traces))) with doc.create(Subsection('Distributions encountered')): with doc.create(Tabularx('ll')) as table: num_distributions = len( artifact.one_hot_distribution.keys()) table.add_row( ('Number of distributions', num_distributions)) table.add_empty_row() for distribution in artifact.one_hot_distribution.keys( ): table.add_row((distribution, '')) with doc.create(Subsection('Unique addresses encountered')): with doc.create(Tabularx('lX')) as table: num_addresses = len(artifact.one_hot_address.keys()) table.add_row(('Number of addresses', num_addresses)) address_collisions = max( 0, num_addresses - artifact.one_hot_address_dim) table.add_row( ('Address collisions', address_collisions)) table.add_empty_row() doc.append('\n') with doc.create(LongTable('llp{16cm}')) as table: # table.add_empty_row() table.add_row('Count', 'ID', 'Unique address') table.add_hline() address_to_abbrev = {} abbrev_to_address = {} abbrev_i = 0 sorted_addresses = sorted( artifact.address_histogram.items(), key=lambda x: x[1], reverse=True) plt_addresses = [] plt_counts = [] address_to_count = {} address_count_total = 0 for address, count in sorted_addresses: abbrev_i += 1 abbrev = 'A' + str(abbrev_i) address_to_abbrev[address] = abbrev abbrev_to_address[abbrev] = address plt_addresses.append(abbrev) plt_counts.append(count) address_to_count[abbrev] = count address_count_total += count table.add_row(('{:,}'.format(count), abbrev, FootnoteText(address))) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) plt_x = range(len(plt_addresses)) ax.bar(plt_x, plt_counts) plt.xticks(plt_x, plt_addresses) plt.xlabel('Unique address ID') plt.ylabel('Count') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Histogram of address hits.') with doc.create(Subsection('Lengths')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Min trace length', '{:,}'.format(artifact.trace_length_min))) table.add_row( ('Max trace length', '{:,}'.format(artifact.trace_length_max))) s = 0 total_count = 0 for trace_length in artifact.trace_length_histogram: count = artifact.trace_length_histogram[ trace_length] s += trace_length * count total_count += count trace_length_mean = s / total_count table.add_row(('Mean trace length', '{:.2f}'.format(trace_length_mean))) with doc.create(Figure(position='H')) as plot: plt_lengths = [ i for i in range(0, artifact.trace_length_max + 1) ] plt_counts = [ artifact.trace_length_histogram[i] if i in artifact.trace_length_histogram else 0 for i in range(0, artifact.trace_length_max + 1) ] fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) ax.bar(plt_lengths, plt_counts) plt.xlabel('Length') plt.ylabel('Count') # plt.yscale('log') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Histogram of trace lengths (of all traces used during training).' ) with doc.create(Subsection('Unique traces encountered')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Unique traces encountered', '{:,}'.format( len(artifact.trace_examples_histogram)))) table.add_row( ('Unique trace memory capacity', '{:,}'.format(artifact.trace_examples_limit))) table.add_row( ('Unique traces rendered in detail', '{:,}'.format( min(len(artifact.trace_examples_histogram), opt.maxTraces)))) doc.append('\n') with doc.create(LongTable('lllp{16cm}')) as table: # table.add_empty_row() table.add_row('Count', 'ID', 'Len.', 'Unique trace') table.add_hline() trace_to_abbrev = {} abbrev_to_trace = {} abbrev_to_addresses = {} abbrev_i = 0 sorted_traces = sorted( artifact.trace_examples_histogram.items(), key=lambda x: x[1], reverse=True) plt_traces = [] plt_counts = [] trace_to_count = {} trace_count_total = 0 for trace, count in sorted_traces: abbrev_i += 1 abbrev = 'T' + str(abbrev_i) trace_to_abbrev[trace] = abbrev abbrev_to_trace[abbrev] = trace abbrev_to_addresses[abbrev] = list( map(lambda x: address_to_abbrev[x], artifact.trace_examples_addresses[trace])) trace_addresses = abbrev_to_addresses[abbrev] trace_addresses_repetitions = util.pack_repetitions( trace_addresses) plt_traces.append(abbrev) plt_counts.append(count) trace_to_count[trace] = count trace_count_total += count length = len( artifact.trace_examples_addresses[trace]) table.add_row( ('{:,}'.format(count), abbrev, '{:,}'.format(length), FootnoteText('-'.join([ a + 'x' + str(i) if i > 1 else a for a, i in trace_addresses_repetitions ])))) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) plt_x = range(len(plt_traces)) ax.bar(plt_x, plt_counts) plt.xticks(plt_x, plt_traces) plt.xlabel('Unique trace ID') plt.ylabel('Count') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Histogram of unique traces.') with doc.create(Figure(position='H')) as plot: master_trace_pairs = {} transition_count_total = 0 for trace, count in sorted_traces: ta = abbrev_to_addresses[trace_to_abbrev[trace]] for left, right in zip(ta, ta[1:]): if (left, right) in master_trace_pairs: master_trace_pairs[(left, right)] += count else: master_trace_pairs[(left, right)] = count transition_count_total += count fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) master_graph = pydotplus.graphviz.Dot( graph_type='digraph', rankdir='LR') for p, w in master_trace_pairs.items(): nodes = master_graph.get_node(p[0]) if len(nodes) > 0: n0 = nodes[0] else: n0 = pydotplus.Node(p[0]) master_graph.add_node(n0) nodes = master_graph.get_node(p[1]) if len(nodes) > 0: n1 = nodes[0] else: n1 = pydotplus.Node(p[1]) master_graph.add_node(n1) master_graph.add_edge( pydotplus.Edge(n0, n1, weight=w)) for node in master_graph.get_nodes(): node.set_color('gray') node.set_fontcolor('gray') for edge in master_graph.get_edges(): edge.set_color('gray') master_graph_annotated = pydotplus.graphviz.graph_from_dot_data( master_graph.to_string()) for node in master_graph_annotated.get_nodes(): color = util.rgb_to_hex( util.rgb_blend( (1, 1, 1), (1, 0, 0), address_to_count[node.obj_dict['name']] / address_count_total)) node.set_style('filled') node.set_fillcolor(color) node.set_color('black') node.set_fontcolor('black') for edge in master_graph_annotated.get_edges(): (left, right) = edge.obj_dict['points'] count = master_trace_pairs[(left, right)] edge.set_label(count) color = util.rgb_to_hex( (1.5 * (count / transition_count_total), 0, 0)) edge.set_color(color) png_str = master_graph_annotated.create_png( prog=['dot', '-Gsize=15', '-Gdpi=600']) bio = BytesIO() bio.write(png_str) bio.seek(0) img = np.asarray(mpimg.imread(bio)) plt.imshow(util.crop_image(img), interpolation='bilinear') plt.axis('off') plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Succession of unique address IDs (accumulated over all traces).' ) for trace, count in sorted_traces[:opt.maxTraces]: trace = trace_to_abbrev[trace] doc.append(NoEscape(r'\newpage')) with doc.create(Subsubsection('Unique trace ' + trace)): sys.stdout.write( 'Rendering unique trace {0}... \r' .format(trace)) sys.stdout.flush() addresses = len(address_to_abbrev) trace_addresses = abbrev_to_addresses[trace] with doc.create(Tabularx('ll')) as table: table.add_row( FootnoteText('Count'), FootnoteText('{:,}'.format(count))) table.add_row( FootnoteText('Length'), FootnoteText('{:,}'.format( len(trace_addresses)))) doc.append('\n') im = np.zeros((addresses, len(trace_addresses))) for i in range(len(trace_addresses)): address = trace_addresses[i] address_i = plt_addresses.index(address) im[address_i, i] = 1 truncate = 100 for col_start in range(0, len(trace_addresses), truncate): col_end = min(col_start + truncate, len(trace_addresses)) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(20 * ( (col_end + 4 - col_start) / truncate), 4)) ax = plt.subplot(111) # ax.imshow(im,cmap=plt.get_cmap('Greys')) sns.heatmap(im[:, col_start:col_end], cbar=False, linecolor='lightgray', linewidths=.5, cmap='Greys', yticklabels=plt_addresses, xticklabels=np.arange( col_start, col_end)) plt.yticks(rotation=0) fig.tight_layout() plot.add_plot( width=NoEscape(r'{0}\textwidth'.format( (col_end + 4 - col_start) / truncate)), placement=NoEscape(r'\raggedright')) with doc.create(Figure(position='H')) as plot: pairs = {} for left, right in zip(trace_addresses, trace_addresses[1:]): if (left, right) in pairs: pairs[(left, right)] += 1 else: pairs[(left, right)] = 1 fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) graph = pydotplus.graphviz.graph_from_dot_data( master_graph.to_string()) trace_address_to_count = {} for address in trace_addresses: if address in trace_address_to_count: trace_address_to_count[address] += 1 else: trace_address_to_count[address] = 1 for p, w in pairs.items(): left_node = graph.get_node(p[0])[0] right_node = graph.get_node(p[1])[0] edge = graph.get_edge(p[0], p[1])[0] color = util.rgb_to_hex( util.rgb_blend( (1, 1, 1), (1, 0, 0), trace_address_to_count[p[0]] / len(trace_addresses))) left_node.set_style('filled') left_node.set_fillcolor(color) left_node.set_color('black') left_node.set_fontcolor('black') color = util.rgb_to_hex( util.rgb_blend( (1, 1, 1), (1, 0, 0), trace_address_to_count[p[0]] / len(trace_addresses))) right_node.set_style('filled') right_node.set_fillcolor(color) right_node.set_color('black') right_node.set_fontcolor('black') (left, right) = edge.obj_dict['points'] edge.set_label(w) color = util.rgb_to_hex( (1.5 * (w / len(trace_addresses)), 0, 0)) edge.set_color(color) png_str = graph.create_png( prog=['dot', '-Gsize=30', '-Gdpi=600']) bio = BytesIO() bio.write(png_str) bio.seek(0) img = np.asarray(mpimg.imread(bio)) plt.imshow(util.crop_image(img), interpolation='bilinear') plt.axis('off') plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Succession of unique address IDs (for one trace of type ' + trace + ').') with doc.create(Tabularx('lp{16cm}')) as table: trace_addresses_repetitions = util.pack_repetitions( trace_addresses) table.add_row( FootnoteText('Trace'), FootnoteText('-'.join([ a + 'x' + str(i) if i > 1 else a for a, i in trace_addresses_repetitions ]))) doc.generate_pdf(opt.saveReport, clean_tex=False) sys.stdout.write( ' \r' ) sys.stdout.flush() except KeyboardInterrupt: util.logger.log('Stopped') except Exception: traceback.print_exc(file=sys.stdout) sys.exit(0)
addPlots(doc, options, coords) for o in options: coords[o].clear() k = 0 section = Section('%s' % (presec.replace("_", "\_"))) doc.append(section) print("create section: " + presec) if parts[1] != prevsubsec: prevsubsec = parts[1] subsection = Subsection('%s' % (prevsubsec.replace("_", "\_"))) section.append(subsection) print("create subsection: " + prevsubsec) if len(parts) > 2: subsubsection = Subsubsection('%s' % (parts[2].replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[2]) else: subsubsection = Subsubsection('%s' % (parts[1].replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[1]) if solutions[0][3] == 'SAT': solutions.sort(key=lambda x: (x[3], x[1])) table = Table('l|r|l|r|r') subsubsection.append(table) table.add_hline() table.add_row(("Param.", 'Status', "\#Sol", 'Time(sec)', 'Nodes')) table.add_hline() for i in range(0, len(solutions)):
def __detailsFZN(self, doc, options, optPerSol, fnames, maxtime, bestever): coords = {} objs = {} for o in options: coords[o] = [] objs[o] = [] objs['syb'] = [] pol = 'SAT' presec = "" prevsubsec = "" section = None subsection = None # Third problem per problem k = 0 for fname in fnames: parts = fname.split("+") solutions = optPerSol[fname] if parts[0] != presec: presec = parts[0] if k > 0: self.__addTimePlots(doc, options, coords) for o in options: coords[o].clear() k = 0 if len(objs) > 0: self.__addObjPlots(doc, options, objs, pol) for o in objs.keys(): objs[o].clear() section = Section('%s' % (presec)) # .replace("_", "\_"))) doc.append(section) print("create section: " + presec) if parts[1] != prevsubsec: prevsubsec = parts[1] subsection = Subsection('%s' % (prevsubsec)) # .replace("_", "\_"))) section.append(subsection) print("create subsection: " + prevsubsec) if len(parts) > 2: subsubsection = Subsubsection( '%s' % (parts[2])) # .replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[2]) else: subsubsection = Subsubsection( '%s' % (parts[1])) # .replace("_", "\_"))) subsection.append(subsubsection) print("create subsubsection: " + parts[1]) pol = solutions[0][3] if solutions[0][3] == 'SAT': solutions.sort(key=lambda x: (x[3], x[1])) table = Tabular('l|r|l|r|r|r') subsubsection.append(table) table.add_hline() table.add_row(("Config.", 'Status', "#Sol", 'Time(sec)', 'Build(sec)', 'Nodes')) table.add_hline() for i in range(0, len(solutions)): table.add_row( (solutions[i][6], solutions[i][5], solutions[i][0], solutions[i][1], solutions[i][7], solutions[i][2])) coords[solutions[i][6]].append((k, solutions[i][1])) table.add_hline() table.add_hline() # add syb if fname in bestever: table.add_row("syb", bestever[fname][0], "--", "--", "--", "--") table.add_hline() else: # sort for MIN type = 'MIN' solutions.sort(key=lambda x: (x[3], x[4], x[1])) best = solutions[0][4] # check first row and last row if solutions[0][3] == 'MAX' or solutions[len(solutions) - 1][3] == 'MAX': solutions.sort(key=lambda x: (x[3], -x[4], x[1])) best = solutions[0][4] type = 'MAX' table = Tabular('l|r|l|r|r|r|r') subsubsection.append(table) table.add_hline() table.add_row(("Config.", type, 'Status', "#Sol", 'Time(sec)', 'Build(sec)', 'Nodes')) table.add_hline() for i in range(0, len(solutions)): table.add_row( (solutions[i][6], solutions[i][4], solutions[i][5], solutions[i][0], solutions[i][1], solutions[i][7], solutions[i][2])) if solutions[i][4] == best: coords[solutions[i][6]].append((k, solutions[i][1])) else: coords[solutions[i][6]].append((k, maxtime)) if int(solutions[i][0]) > 0: objs[solutions[i][6]].append((k, solutions[i][4])) table.add_hline() table.add_hline() # add syb if fname in bestever: if len(bestever[fname]) > 1: table.add_row("syb", bestever[fname][1], bestever[fname][0], "--", "--", "--", "--") objs['syb'].append((k, bestever[fname][1])) else: table.add_row("syb", "--", bestever[fname][0], "--", "--", "--", "--") table.add_hline() k += 1 if k > 0: self.__addTimePlots(doc, options, coords) for o in options: coords[o].clear() k = 0 if len(objs) > 0: self.__addObjPlots(doc, options, objs, pol) for o in objs.keys(): objs[o].clear()
def save_report(model, file_name, detailed_traces=2): print('Saving analytics report to {}.tex and {}.pdf'.format( file_name, file_name)) inference_network = model._inference_network iter_per_sec = inference_network._total_train_iterations / inference_network._total_train_seconds traces_per_sec = inference_network._total_train_traces / inference_network._total_train_seconds traces_per_iter = inference_network._total_train_traces / inference_network._total_train_iterations train_loss_initial = inference_network._history_train_loss[0] train_loss_final = inference_network._history_train_loss[-1] train_loss_change = train_loss_final - train_loss_initial train_loss_change_per_sec = train_loss_change / inference_network._total_train_seconds train_loss_change_per_iter = train_loss_change / inference_network._total_train_iterations train_loss_change_per_trace = train_loss_change / inference_network._total_train_traces valid_loss_initial = inference_network._history_valid_loss[0] valid_loss_final = inference_network._history_valid_loss[-1] valid_loss_change = valid_loss_final - valid_loss_initial valid_loss_change_per_sec = valid_loss_change / inference_network._total_train_seconds valid_loss_change_per_iter = valid_loss_change / inference_network._total_train_iterations valid_loss_change_per_trace = valid_loss_change / inference_network._total_train_traces sys.stdout.write( 'Generating report... \r') sys.stdout.flush() geometry_options = { 'tmargin': '1.5cm', 'lmargin': '1cm', 'rmargin': '1cm', 'bmargin': '1.5cm' } doc = Document('basic', geometry_options=geometry_options) doc.preamble.append(NoEscape(r'\usepackage[none]{hyphenat}')) doc.preamble.append(NoEscape(r'\usepackage{float}')) # doc.preamble.append(NoEscape(r'\renewcommand{\familydefault}{\ttdefault}')) doc.preamble.append(Command('title', 'pyprob analytics: ' + model.name)) doc.preamble.append( Command( 'date', NoEscape(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))) doc.append(NoEscape(r'\maketitle')) # doc.append(NoEscape(r'\small')) print('Analytics: Current system') with doc.create(Section('Current system', numbering=False)): with doc.create(Tabularx('ll')) as table: table.add_row(('pyprob version', __version__)) table.add_row(('PyTorch version', torch.__version__)) # doc.append(NoEscape(r'\newpage')) print('Analytics: Inference network') with doc.create(Section('Inference network', numbering=False)): print('Analytics: Inference network.File') with doc.create(Section('File')): with doc.create(Tabularx('ll')) as table: # table.add_row(('File name', file_name)) # file_size = '{:,}'.format(os.path.getsize(file_name)) # table.add_row(('File size', file_size + ' Bytes')) table.add_row(('Created', inference_network._created)) table.add_row(('Modified', inference_network._modified)) table.add_row(('Updates to file', inference_network._updates)) print('Analytics: Inference network.Training') with doc.create(Section('Training')): with doc.create(Tabularx('ll')) as table: table.add_row( ('pyprob version', inference_network._pyprob_version)) table.add_row( ('PyTorch version', inference_network._torch_version)) table.add_row(('Trained on', inference_network._trained_on)) table.add_row(('Total training time', '{0}'.format( util.days_hours_mins_secs_str( inference_network._total_train_seconds)))) table.add_row( ('Total training traces', '{:,}'.format(inference_network._total_train_traces))) table.add_row(('Traces / s', '{:,.2f}'.format(traces_per_sec))) table.add_row( ('Traces / iteration', '{:,.2f}'.format(traces_per_iter))) table.add_row( ('Iterations', '{:,}'.format(inference_network._total_train_iterations))) table.add_row( ('Iterations / s', '{:,.2f}'.format(iter_per_sec))) table.add_row(('Optimizer', inference_network._optimizer_type)) table.add_row(('Validation set size', inference_network._valid_batch.length)) print('Analytics: Inference network.Training loss') with doc.create(Subsection('Training loss')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Initial loss', '{:+.6e}'.format(train_loss_initial))) table.add_row( ('Final loss', '{:+.6e}'.format(train_loss_final))) table.add_row(('Loss change / s', '{:+.6e}'.format(train_loss_change_per_sec))) table.add_row(('Loss change / iteration', '{:+.6e}'.format(train_loss_change_per_iter))) table.add_row(('Loss change / trace', '{:+.6e}'.format(train_loss_change_per_trace))) print('Analytics: Inference network.Validation loss') with doc.create(Subsection('Validation loss')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Initial loss', '{:+.6e}'.format(valid_loss_initial))) table.add_row( ('Final loss', '{:+.6e}'.format(valid_loss_final))) table.add_row(('Loss change / s', '{:+.6e}'.format(valid_loss_change_per_sec))) table.add_row(('Loss change / iteration', '{:+.6e}'.format(valid_loss_change_per_iter))) table.add_row(('Loss change / trace', '{:+.6e}'.format(valid_loss_change_per_trace))) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 6)) ax = plt.subplot(111) ax.plot(inference_network._history_train_loss_trace, inference_network._history_train_loss, label='Training') ax.plot(inference_network._history_valid_loss_trace, inference_network._history_valid_loss, label='Validation') ax.legend() plt.xlabel('Training traces') plt.ylabel('Loss') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Loss plot.') print('Analytics: Inference network.Neural network modules') with doc.create(Section('Neural network modules')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Total trainable parameters', '{:,}'.format(inference_network._history_num_params[-1]))) # table.add_row(('Softmax boost', inference_network.softmax_boost)) # table.add_row(('Dropout', inference_network.dropout)) # table.add_row(('Standardize inputs', inference_network.standardize_observes)) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 4)) ax = plt.subplot(111) ax.plot(inference_network._history_num_params_trace, inference_network._history_num_params) plt.xlabel('Training traces') plt.ylabel('Number of parameters') plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Number of parameters.') doc.append(NoEscape(r'\newpage')) print( 'Analytics: Inference network.Neural network modules.All modules' ) with doc.create(Subsection('All modules')): doc.append(str(inference_network)) for m_name, m in inference_network.named_modules(): if (m_name != ''): regex = r'(sample_embedding_layer\(\S*\)._)|(proposal_layer\(\S*\)._)|(_observe_embedding_layer.)|(_lstm)' if len(list(re.finditer(regex, m_name))) > 0: # if ('_observe_embedding_layer.' in m_name) or ('sample_embedding_layer.' in m_name) or ('proposal_layer.' in m_name): doc.append(NoEscape(r'\newpage')) with doc.create(Subsubsection(m_name)): doc.append(str(m)) for p_name, p in m.named_parameters(): if not 'bias' in p_name: with doc.create( Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 10)) ax = plt.subplot(111) plt.imshow(np.transpose( util.weights_to_image(p), (1, 2, 0)), interpolation='none') plt.axis('off') plot.add_plot( width=NoEscape(r'\textwidth')) plot.add_caption(m_name + '_' + p_name) # doc.append(NoEscape(r'\newpage')) # print('Analytics: Inference network.Neural network modules.Address embeddings') # with doc.create(Subsection('Address embeddings')): # for p_name, p in inference_network.named_parameters(): # if ('address_embedding' in p_name): # with doc.create(Figure(position='H')) as plot: # fig = plt.figure(figsize=(10,10)) # ax = plt.subplot(111) # plt.imshow(np.transpose(util.weights_to_image(p),(1,2,0)), interpolation='none') # plt.axis('off') # plot.add_plot(width=NoEscape(r'\textwidth')) # plot.add_caption(FootnoteText(p_name.replace('::', ':: '))) gc.collect() doc.append(NoEscape(r'\newpage')) print('Analytics: Inference network.Traces') with doc.create(Section('Traces')): with doc.create(Tabularx('ll')) as table: table.add_row( ('Total training traces', '{:,}'.format(inference_network._total_train_traces))) print( 'Analytics: Inference network.Traces.Distributions encountered' ) with doc.create(Subsection('Distributions encountered')): with doc.create(Tabularx('ll')) as table: # print([v[2] for v in inference_network._address_stats.values()]) distributions = set([ v[2] for v in inference_network._address_stats.values() ]) num_distributions = len(distributions) table.add_row( ('Number of distributions', num_distributions)) table.add_empty_row() for distribution in distributions: table.add_row((distribution, '')) print('Analytics: Inference network.Traces.Addresses encountered') with doc.create(Subsection('Addresses encountered')): with doc.create(Tabularx('lX')) as table: num_addresses_all = len( inference_network._address_stats.keys()) table.add_row(('Number of addresses', num_addresses_all)) num_addresses_controlled = len([ k for k, v in inference_network._address_stats.items() if v[3] ]) num_addresses_replaced = len([ k for k, v in inference_network._address_stats.items() if v[3] and v[4] ]) num_addresses_observed = len([ k for k, v in inference_network._address_stats.items() if v[5] ]) table.add_row( (TextColor('red', 'Number of addresses (controlled)'), TextColor('red', num_addresses_controlled))) table.add_row((TextColor('green', 'Number of addresses (replaced)'), TextColor('green', num_addresses_replaced))) table.add_row((TextColor('blue', 'Number of addresses (observed)'), TextColor('blue', num_addresses_observed))) table.add_row( ('Number of addresses (uncontrolled)', num_addresses_all - num_addresses_controlled - num_addresses_observed)) table.add_empty_row() doc.append('\n') with doc.create(LongTable('llllllp{12cm}')) as table: # table.add_empty_row() table.add_row(FootnoteText('Count'), FootnoteText('ID'), FootnoteText('Distrib.'), FootnoteText('Ctrl.'), FootnoteText('Replace'), FootnoteText('Obs.'), FootnoteText('Address')) table.add_hline() # address_to_abbrev = {} # abbrev_to_address = # abbrev_i = 0 # sorted_addresses = sorted(inference_network.address_histogram.items(), key=lambda x:x[1], reverse=True) plt_all_addresses = [] plt_all_counts = [] plt_all_colors = [] plt_controlled_addresses = [] plt_controlled_counts = [] plt_controlled_colors = [] address_id_to_count = {} address_id_to_color = {} address_id_count_total = 0 for address, vals in inference_network._address_stats.items( ): address = address.replace('::', ':: ') count = vals[0] address_id = vals[1] distribution = vals[2] control = vals[3] replace = vals[4] observed = vals[5] plt_all_addresses.append(address_id) plt_all_counts.append(1 if replace else count) address_id_to_count[address_id] = count address_id_count_total += count if control: if replace: color = 'green' plt_controlled_counts.append(1) else: color = 'red' plt_controlled_counts.append(count) plt_controlled_addresses.append(address_id) plt_controlled_colors.append(color) elif observed: color = 'blue' plt_controlled_addresses.append(address_id) plt_controlled_colors.append(color) plt_controlled_counts.append(count) else: color = 'black' table.add_row( (TextColor(color, FootnoteText('{:,}'.format(count))), TextColor(color, FootnoteText(address_id)), TextColor(color, FootnoteText(distribution)), TextColor(color, FootnoteText(control)), TextColor(color, FootnoteText(replace)), TextColor(color, FootnoteText(observed)), TextColor(color, FootnoteText(address)))) plt_all_colors.append(color) address_id_to_color[address_id] = color gc.collect() with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) plt_x = range(len(plt_all_addresses)) ax.bar(plt_x, plt_all_counts, color=plt_all_colors) plt.xticks(plt_x, plt_all_addresses) plt.xlabel('Address ID') plt.ylabel('Count') # plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Histogram of all addresses. Red: controlled, green: replaced, black: uncontrolled, blue: observed.' ) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) plt_x = range(len(plt_controlled_addresses)) ax.bar(plt_x, plt_controlled_counts, color=plt_controlled_colors) plt.xticks(plt_x, plt_controlled_addresses) plt.xlabel('Address ID') plt.ylabel('Count') # plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Histogram of controlled and observed addresses. Red: controlled, green: replaced, blue: observed.' ) gc.collect() print('Analytics: Inference network.Traces.Trace lengths') with doc.create(Subsection('Trace lengths')): with doc.create(Tabularx('ll')) as table: trace_lengths_controlled = [ v[3] for v in inference_network._trace_stats.values() ] trace_lengths_controlled_min = min( trace_lengths_controlled) trace_lengths_controlled_max = max( trace_lengths_controlled) trace_lengths_all = [ v[2] for v in inference_network._trace_stats.values() ] trace_lengths_all_min = min(trace_lengths_all) trace_lengths_all_max = max(trace_lengths_all) s_controlled = 0 s_all = 0 total_count = 0 for _, v in inference_network._trace_stats.items(): trace_length_controlled = v[3] trace_length_all = v[2] count = v[0] s_controlled += trace_length_controlled * count total_count += count s_all += trace_length_all * count trace_length_controlled_mean = s_controlled / total_count trace_length_all_mean = s_all / total_count table.add_row(('Trace length min', '{:,}'.format(trace_lengths_all_min))) table.add_row(('Trace length max', '{:,}'.format(trace_lengths_all_max))) table.add_row(('Trace length mean', '{:.2f}'.format(trace_length_all_mean))) table.add_row( ('Controlled trace length min', '{:,}'.format(trace_lengths_controlled_min))) table.add_row( ('Controlled trace length max', '{:,}'.format(trace_lengths_controlled_max))) table.add_row( ('Controlled trace length mean', '{:.2f}'.format(trace_length_controlled_mean))) with doc.create(Figure(position='H')) as plot: plt_counter = dict(Counter(trace_lengths_all)) plt_lengths = [ i for i in range(0, trace_lengths_all_max + 1) ] plt_counts = [ plt_counter[i] if i in plt_counter else 0 for i in range(0, trace_lengths_all_max + 1) ] fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) ax.bar(plt_lengths, plt_counts, width=trace_lengths_all_max / 500.) plt.xlabel('Length') plt.ylabel('Count') # plt.yscale('log') # plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Histogram of trace lengths.') with doc.create(Figure(position='H')) as plot: plt_counter = dict(Counter(trace_lengths_controlled)) plt_lengths = [ i for i in range(0, trace_lengths_controlled_max + 1) ] plt_counts = [ plt_counter[i] if i in plt_counter else 0 for i in range(0, trace_lengths_controlled_max + 1) ] fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) ax.bar(plt_lengths, plt_counts) plt.xlabel('Length') plt.ylabel('Count') # plt.yscale('log') # plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Histogram of controlled trace lengths.') gc.collect() print( 'Analytics: Inference network.Traces.Unique traces encountered' ) with doc.create(Subsection('Unique traces encountered')): detailed_traces = min(len(inference_network._trace_stats), detailed_traces) with doc.create(Tabularx('ll')) as table: table.add_row( ('Unique traces encountered', '{:,}'.format(len(inference_network._trace_stats)))) table.add_row(('Unique traces rendered in detail', '{:,}'.format(detailed_traces))) doc.append('\n') with doc.create(LongTable('llllp{15cm}')) as table: # table.add_empty_row() table.add_row(FootnoteText('Count'), FootnoteText('ID'), FootnoteText('Len.'), FootnoteText('Ctrl. len.'), FootnoteText('Unique trace')) table.add_hline() plt_traces = [] plt_counts = [] for trace_str, vals in inference_network._trace_stats.items( ): count = vals[0] trace_id = vals[1] length_all = vals[2] length_controlled = vals[3] addresses_controlled = vals[4] addresses_controlled_str = ' '.join( addresses_controlled) plt_traces.append(trace_id) plt_counts.append(count) table.add_row( (FootnoteText('{:,}'.format(count)), FootnoteText(trace_id), FootnoteText('{:,}'.format(length_all)), FootnoteText('{:,}'.format(length_controlled)), FootnoteText(addresses_controlled_str))) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) plt_x = range(len(plt_traces)) ax.bar(plt_x, plt_counts) plt.xticks(plt_x, plt_traces) plt.xlabel('Trace ID') plt.ylabel('Count') # plt.grid() fig.tight_layout() plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption('Histogram of unique traces.') with doc.create(Figure(position='H')) as plot: sorted_trace_stats = OrderedDict( sorted(dict(inference_network._trace_stats).items(), key=lambda x: x[1], reverse=True)) master_trace_pairs = {} transition_count_total = 0 for trace_str, vals in sorted_trace_stats.items(): count = vals[0] ta = vals[4] for left, right in zip(ta, ta[1:]): if (left, right) in master_trace_pairs: master_trace_pairs[(left, right)] += count else: master_trace_pairs[(left, right)] = count transition_count_total += count fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) master_graph = pydotplus.graphviz.Dot(graph_type='digraph', rankdir='LR') transition_count_max = 0 for p, count in master_trace_pairs.items(): if count > transition_count_max: transition_count_max = count nodes = master_graph.get_node(p[0]) if len(nodes) > 0: n0 = nodes[0] else: n0 = pydotplus.Node(p[0]) master_graph.add_node(n0) nodes = master_graph.get_node(p[1]) if len(nodes) > 0: n1 = nodes[0] else: n1 = pydotplus.Node(p[1]) master_graph.add_node(n1) master_graph.add_edge( pydotplus.Edge(n0, n1, weight=count)) for node in master_graph.get_nodes(): node.set_color('gray') node.set_fontcolor('gray') for edge in master_graph.get_edges(): edge.set_color('gray') master_graph_annotated = pydotplus.graphviz.graph_from_dot_data( master_graph.to_string()) for node in master_graph_annotated.get_nodes(): # color = util.rgb_to_hex(util.rgb_blend((1, 1, 1), (1, 0, 0), address_id_to_count[node.obj_dict['name']] / address_id_count_total)) address_id = node.obj_dict['name'] node.set_style('filled') node.set_fillcolor(address_id_to_color[address_id]) node.set_color('black') node.set_fontcolor('black') for edge in master_graph_annotated.get_edges(): (left, right) = edge.obj_dict['points'] count = master_trace_pairs[(left, right)] edge.set_label(count) # color = util.rgb_to_hex((1.5*(count/transition_count_total), 0, 0)) edge.set_color('black') edge.set_penwidth(2.5 * count / transition_count_max) png_str = master_graph_annotated.create_png( prog=['dot', '-Gsize=90', '-Gdpi=600']) bio = BytesIO() bio.write(png_str) bio.seek(0) img = np.asarray(mpimg.imread(bio)) plt.imshow(util.crop_image(img), interpolation='bilinear') plt.axis('off') plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Succession of controlled addresses (accumulated over all traces). Red: controlled, green: replaced, blue: observed.' ) for trace_str, vals in OrderedDict( islice(sorted_trace_stats.items(), 0, detailed_traces)).items(): count = vals[0] trace_id = vals[1] doc.append(NoEscape(r'\newpage')) with doc.create(Subsubsection('Unique trace ' + trace_id)): sys.stdout.write( 'Rendering unique trace {0}... \r' .format(trace_id)) sys.stdout.flush() addresses = len(plt_controlled_addresses) trace_addresses = vals[4] with doc.create(Tabularx('ll')) as table: table.add_row(FootnoteText('Count'), FootnoteText('{:,}'.format(count))) table.add_row( FootnoteText('Controlled length'), FootnoteText('{:,}'.format( len(trace_addresses)))) doc.append('\n') im = np.zeros((addresses, len(trace_addresses))) for i in range(len(trace_addresses)): address = trace_addresses[i] address_i = plt_controlled_addresses.index(address) im[address_i, i] = 1 truncate = 100 for col_start in range(0, len(trace_addresses), truncate): col_end = min(col_start + truncate, len(trace_addresses)) with doc.create(Figure(position='H')) as plot: fig = plt.figure(figsize=(20 * ( (col_end + 4 - col_start) / truncate), 4)) ax = plt.subplot(111) # ax.imshow(im,cmap=plt.get_cmap('Greys')) sns.heatmap( im[:, col_start:col_end], cbar=False, linecolor='lightgray', linewidths=.5, cmap='Greys', yticklabels=plt_controlled_addresses, xticklabels=np.arange(col_start, col_end)) plt.yticks(rotation=0) fig.tight_layout() plot.add_plot( width=NoEscape(r'{0}\textwidth'.format( (col_end + 4 - col_start) / truncate)), placement=NoEscape(r'\raggedright')) with doc.create(Figure(position='H')) as plot: pairs = {} for left, right in zip(trace_addresses, trace_addresses[1:]): if (left, right) in pairs: pairs[(left, right)] += 1 else: pairs[(left, right)] = 1 fig = plt.figure(figsize=(10, 5)) ax = plt.subplot(111) graph = pydotplus.graphviz.graph_from_dot_data( master_graph.to_string()) trace_address_to_count = {} for address in trace_addresses: if address in trace_address_to_count: trace_address_to_count[address] += 1 else: trace_address_to_count[address] = 1 transition_count_max = 0 for p, count in pairs.items(): if count > transition_count_max: transition_count_max = count left_node = graph.get_node(p[0])[0] right_node = graph.get_node(p[1])[0] edge = graph.get_edge(p[0], p[1])[0] # color = util.rgb_to_hex(util.rgb_blend((1,1,1), (1,0,0), trace_address_to_count[p[0]] / len(trace_addresses))) left_node.set_style('filled') left_node.set_fillcolor( address_id_to_color[p[0]]) left_node.set_color('black') left_node.set_fontcolor('black') # color = util.rgb_to_hex(util.rgb_blend((1,1,1), (1,0,0), trace_address_to_count[p[0]] / len(trace_addresses))) right_node.set_style('filled') right_node.set_fillcolor( address_id_to_color[p[1]]) right_node.set_color('black') right_node.set_fontcolor('black') # (left, right) = edge.obj_dict['points'] edge.set_label(count) # color = util.rgb_to_hex((1.5*(count/len(trace_addresses)),0,0)) edge.set_color('black') edge.set_penwidth(2.5 * count / transition_count_max) png_str = graph.create_png( prog=['dot', '-Gsize=90', '-Gdpi=600']) bio = BytesIO() bio.write(png_str) bio.seek(0) img = np.asarray(mpimg.imread(bio)) plt.imshow(util.crop_image(img), interpolation='bilinear') plt.axis('off') plot.add_plot(width=NoEscape(r'\textwidth')) plot.add_caption( 'Succession of controlled addresses (for one trace of type ' + trace_id + '). Red: controlled, green: replaced, blue: observed.' ) with doc.create(Tabularx('lp{16cm}')) as table: table.add_row( FootnoteText('Trace'), FootnoteText(' '.join(trace_addresses))) doc.generate_pdf(file_name, clean_tex=False) sys.stdout.write( ' \r') sys.stdout.flush()
def subsubsection(doc, title): return doc.create(Subsubsection(NoEscape(title)))