Ejemplo n.º 1
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(
            pl.Tabular('l | c c c | c c c | c c',
                       col_space='0.15cm',
                       pos=['h'])) as table:
        header_row0 = [
            "Institutions", "Contracts", "Long", "Ch.", "Contracts", 'Short',
            'Ch.', 'Net Position', 'Net Ch.'
        ]
        header_row1 = [
            '', '(k)', '(%)', '(k)', '(k)', '(%)', '(k)', "(k)", "(k)"
        ]
        table.add_hline()
        table.add_row(header_row0, mapper=[bold], color=color)
        table.add_row(header_row1, mapper=[bold], color=color)
        table.add_hline()
    for ind in df.index:
        color = 'white' if color == 'lightgray' else 'lightgray'
        row = _fill_row(ind, df)
        table.add_row(row, color=color)
    table.add_hline()
Ejemplo n.º 2
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(pl.Tabular('l c c c c c c c', col_space='0.15cm', pos=['h'])) as table:
        header_row0 = ['DI', 'Last Trade', "Contracts",  'DV01',
            'DV01-5', 'DV01-21', 'DV01-63', "DV01-126"]
        header_row1 = ['', '', '', '', 'Bus. Days',
                       'Bus. Days', 'Bus. Days', "Bus. Days"]
        header_row2 = ['', '(%)', '(k)', '(k)', '(Ave. (k))',
                       '(Ave.(k))', '(Ave.(k))', "(Ave.(k))"]
        table.add_hline()
        table.add_row(header_row0, mapper=[bold], color=color)
        table.add_row(header_row1, mapper=[bold], color=color)
        table.add_row(header_row2, mapper=[bold], color=color)
        table.add_hline()
    for ind in df.index:
        color = 'white' if color != 'white' else 'lightgray'
        row = _fill_row(ind, df)
        if ind == "Market":
            color = 'gray'
        table.add_row(row, color=color)
    table.add_hline()
Ejemplo n.º 3
0
def table() -> pylatex.Table:
    optics_single = optics.as_designed_single_channel()
    model_distortion = optics_single.rays_output.distortion.model()
    model_distortion_relative = optics_single.rays_output_relative.distortion.model(
    )

    def fmt_coeff(coeff: u.Quantity):
        return kgpy.format.quantity(
            a=coeff.value * u.dimensionless_unscaled,
            scientific_notation=True,
            digits_after_decimal=2,
        )

    result = pylatex.Table()
    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular('ll|rr')) as tabular:
            tabular.escape = False
            tabular.append(
                '\multicolumn{2}{l}{Coefficient} & $x\'$ & $y\'$\\\\')
            tabular.add_hline()
            for c, name in enumerate(model_distortion.x.coefficient_names):
                tabular.add_row([
                    f'{name}',
                    f'({model_distortion.x.coefficients[c].unit:latex_inline})',
                    fmt_coeff(
                        model_distortion_relative.x.coefficients[c].squeeze()),
                    fmt_coeff(
                        model_distortion_relative.y.coefficients[c].squeeze()),
                ])
    return result
Ejemplo n.º 4
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(pl.Tabular('l | c | c | c', col_space='0.2cm', pos=['b'])) as table:
        header_row = ['Item'] + list(df.columns)
        table.add_hline()
        table.add_row(header_row, mapper=[bold])
        table.add_hline()
        table.add_hline()
    for ind in df.index:
        color = 'white' if color == 'lightgray' else 'lightgray'
        row = _fill_row(ind, df)
        if ind in ['Core Inflation (avg.)', "Foods at Home",
                   "Free Prices", "Monitored Prices", "Diffusion"]:
            table.add_row(row, mapper=[bold])
        else:
            table.add_row(row)

    table.add_hline()
Ejemplo n.º 5
0
def produce_header_footer():
    """
    Adds a generic header/footer to the report. Includes the date and CFIA logo in the header + legend in the footer.
    """
    header = pl.PageStyle("header", header_thickness=0.1)

    image_filename = get_image()
    with header.create(pl.Head("L")) as logo:
        logo.append(
            pl.StandAloneGraphic(image_options="width=110px",
                                 filename=image_filename))

    # Date
    with header.create(pl.Head("R")):
        header.append("Date Report Issued: " +
                      datetime.today().strftime('%Y-%m-%d'))

    # Footer
    with header.create(pl.Foot("C")):
        with header.create(pl.Tabular('lcr')) as table:
            table.add_row(
                '',
                bold(
                    'Data interpretation guidelines can be found in RDIMS document ID: 10401305'
                ), '')
            table.add_row(
                '', bold('This report was generated with OLC AutoROGA v1.2'),
                '')
    return header
Ejemplo n.º 6
0
    def add_page3(self):
        doc = self.m_doc
        doc.append("Influence of mesocrystal height")
        doc.append(pl.VerticalSpace("2cm"))
        doc.append("\n")
        with doc.create(
                pl.MiniPage(width=r"0.2\textwidth",
                            height=r"0.25\textwidth",
                            content_pos='t')):
            lines = data.split("\n")
            with doc.create(pl.Tabular('l l', row_height=0.8)) as table:
                myfont = [mono, tiny]
                for l in lines:
                    parts = l.split(":")
                    print(parts)
                    if len(parts) == 2:
                        table.add_row(parts[0], parts[1], mapper=myfont)
                    elif len(parts) == 1:
                        table.add_hline()
                        table.add_row((l, " "), mapper=myfont)
            doc.append("\n")

        with doc.create(
                pl.MiniPage(width=r"0.8\textwidth",
                            height=r"0.25\textwidth",
                            content_pos='t')):
            doc.append(
                pl.Command('includegraphics',
                           options='scale=0.8',
                           arguments='meso.png'))
            doc.append("\n")

        doc.append(pl.NewPage())
Ejemplo n.º 7
0
    def sum_sample(self):
        """Summarize the analysis data and plots in a report
        """
        for i in range(len(self.infiles)):
            self.logger.info('\nCreating pdf for sample {} results.\n'.format(
                self.sample[i]))
            geometry_options = {
                'tmargin': '3cm',
                'bmargin': '3cm',
                'rmargin': '3cm',
                'lmargin': '3cm'
            }
            doc = px.Document(documentclass='article',
                              geometry_options=geometry_options)
            doc.preamble.append(
                px.Command('title',
                           'Sequencing results for sample ' + self.sample[i]))
            doc.preamble.append(px.Command('date', px.NoEscape(r'\today')))
            doc.append(px.NoEscape(r'\maketitle'))

            with doc.create(px.Section('Genome coverage')):
                #include table of results with statistics of coverage
                with doc.create(px.Subsection('Coverage results')):
                    with doc.create(px.Tabular(table_spec='l  l')) as table:
                        with open(self.stats_file, 'r') as stats:
                            table.add_hline()
                            stats_data = pd.read_csv(stats, sep='\t')
                            for num in range(len(stats_data.iloc[0])):
                                table.add_row([
                                    stats_data.columns[num],
                                    stats_data.iloc[0][num]
                                ])
                            table.add_hline()
                #include coverage plot
                with doc.create(px.Figure(position='htb!')) as plot:
                    plot.add_image(self.cov_plot[i],
                                   width=px.NoEscape(r'\linewidth'))
                    plot.add_caption(
                        'Genome coverage for sample ' + self.sample[i] +
                        '. Calculated using samtools depth with zero-coverage positions included.'
                    )
            #include mismatch plot comparing the sample to the reference
            with doc.create(px.Section('Comparison to reference genome')):
                with doc.create(px.Figure(position='htb!')) as plot:
                    plot.add_image(self.basefreq_plot + '_' + self.sample[i] +
                                   '.png',
                                   width=px.NoEscape(r'\linewidth'))
                    plot.add_caption(
                        'Mismatch fraction per position for sample ' +
                        self.sample[i] +
                        '. Calculated compared to reference {}.'.format(
                            self.config['folder_locations']['ref_fasta']))

            doc.generate_pdf(
                filepath=os.path.join(self.outputdir, self.sample_info[i] +
                                      '.Report'))
            self.logger.info(
                '\nDone creating pdf for sample {} results.\n'.format(
                    self.sample[i]))
Ejemplo n.º 8
0
def table() -> pylatex.Table:
    result = pylatex.Table(position='!htb')
    result._star_latex_name = True
    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular(table_spec='llll', )) as tabular:
            tabular.escape = False
            tabular.add_row(
                ['Parameter', 'Requirement', 'Science Driver', 'Capabilities'])
            tabular.add_hline()
            tabular.add_row([
                r'Spectral line',
                r'\OV',
                r'\EEs',
                r'\OVion, \MgXion, \HeIion, Figure~\ref{fig:bunch}',
            ])
            tabular.add_row([
                r'Spectral sampling',
                r'\spectralResolutionRequirement',
                r'Broadening from \MHD\ waves',
                r'\dispersionDoppler, Table~\ref{table:prescription}',
            ])
            tabular.add_row([
                r'Spatial resolution',
                r'\angularResolutionRequirement (\spatialResolutionRequirement)',
                r'\EEs',
                r'\spatialResolutionTotal, Table~\ref{table:errorBudget}',
            ])
            tabular.add_row([
                r'\SNRShort',
                r'\snrRequirement\ (\CHShort)',
                r'\MHD\ waves in \CHShort',
                r'\StackedCoronalHoleSNR\ ($\NumExpInStack \times \text{\detectorExposureLength}$ exp.), '
                r'Table~\ref{table:counts}',
            ])
            tabular.add_row([
                r'Cadence',
                r'\cadenceRequirement',
                r'Torsional waves',
                r'\detectorExposureLength\ eff., Section~\ref{subsec:SensitivityandCadence}',
            ])
            tabular.add_row([
                r'Observing time',
                r'\observingTimeRequirement',
                r'\EEs',
                r'\SI{270}{\second}, Section~\ref{sec:MissionProfile}',
            ])
            tabular.add_row([
                r'\FOV\ diameter',
                r'\fovRequirement',
                r'Span \QSShort, \ARShort, and limb',
                r'\fov, Table~\ref{table:prescription}',
            ])
    result.add_caption(
        pylatex.NoEscape(
            r"""\ESIS\ instrument requirements and capabilties. Note that MTF exceeds the Rayleigh criterion of 0.109."""
        ))
    result.append(kgpy.latex.Label('table:scireq'))
    return result
Ejemplo n.º 9
0
 def add_tabular(self, array):
     # \textcolor{\color{red!20!green!30}}{<text>}
     with self.create(tex.Tabular('l|' + 'c' * (len(array[0]) - 1))) as table:
         for i, line in enumerate(array):
             table.add_row(line)
             # table.add_row([str(x) for x in line])
             if i == 0:
                 table.add_hline()
     self.append(tex.utils.NoEscape("\\\\~\\\\"))
Ejemplo n.º 10
0
 def leak_overview_table(self, leaks_docs):
     lo = LeakOverview(leaks_docs)
     lo_dl = LeakOverview(leaks_docs, DataLeak)
     lo_cl = LeakOverview(leaks_docs, CFLeak)
     with self.doc.create(pylatex.Tabular("l l l l l")) as data_table:
         for lo in [lo, lo_dl, lo_cl]:
             data_table.add_row(lo.print_header())
             data_table.add_hline()
             data_table.add_row(lo.print_overview())
             data_table.add_empty_row()
     self.doc.append(NewPage())
Ejemplo n.º 11
0
def gen_latex_version_of_table(
    latex_table_dir: Path,
    content: List[str],
    table_name: str,
    branch_name: str = "dev",
) -> Path:
    msg = "Expected latexify tag to be placed directly following a table"
    assert content[-1].startswith("|"), msg
    num_table_rows = [x.startswith("|")
                      for x in reversed(content)].index(False)
    assert num_table_rows > 2, "expected at least three table rows (including header)"
    markdown_table = list(reversed(content[-1:-(num_table_rows + 1):-1]))
    col_names = [x.strip() for x in markdown_table[0].split("|")[1:-1]]

    # remove last column of links
    remove_links = col_names[-1].lower() == "links"
    if remove_links:
        col_names.pop()
    cols = "|".join(["c" for _ in range(len(col_names))])
    table = pylatex.Tabular(cols)
    table.add_hline()
    table.add_hline()
    table.add_row(tuple(col_names))
    table.add_hline()
    for row in markdown_table[2:]:
        tokens = [x.strip() for x in row.split("|")[1:-1]]
        if remove_links:
            tokens.pop()
        row_contents = []
        for token in tokens:
            mean_regexp = r"<sub><sup>([0-9]+[.][0-9]+)<sub>"
            # std_regexp = r"<sub>\(([0-9]+[.][0-9]+|[a-z]+)\)<\/sub>"
            std_regexp = r"<sub>\(([0-9]+[.][0-9]+e*-*[0-9]*|[a-z]+|)\)<\/sub>"
            mean_strs = re.findall(mean_regexp, token)
            if mean_strs:
                assert len(mean_strs) == 1, "expected a unique mean specifier"
                std_strs = re.findall(std_regexp, token)
                assert len(std_strs) == 1, "expected a unique std specifier"
                mean_str, std_str = mean_strs[0], std_strs[0]
                raw_str = "$" + mean_str + r"_{\pm" + std_str + r"}$"
                token = pylatex.NoEscape(raw_str)
            row_contents.append(token)
        table.add_row(tuple(row_contents))
        table.add_hline()
    latex_str = table.dumps()
    latex_table_dir.mkdir(exist_ok=True, parents=True)
    dest_path = latex_table_dir / f"{table_name}.txt"
    with open(dest_path, "w") as f:
        f.write(latex_str)
    github_project_root = f"/../../tree/{branch_name}/"
    markdown_link = Path(f"{github_project_root}{dest_path}")
    return markdown_link
Ejemplo n.º 12
0
def _confusion_matrices(data):

    for layer in data:
        matrix = np.zeros((2,3))
        for i_true in [1, 2, 3]:
            subdata = data[layer][np.where(data[layer]['Output_number_true'] == i_true)]
            for i_nn in [1, 2, 3]:
                nclas = np.count_nonzero(subdata['Output_number_estimated'] == i_nn)
                if i_nn == 1:
                    matrix[0, i_true - 1] = float(nclas) / subdata.shape[0]
                else:
                    matrix[1, i_true - 1] += float(nclas) / subdata.shape[0]

        table = latex.Tabular('r r c c c')
        table.add_row('', '', '', latex.utils.bold('True'), '')
        table.add_row(
            '',
            '',
            latex.utils.bold('1'),
            latex.utils.bold('2'),
            latex.utils.bold('3')
        )
        table.add_hline()
        table.append(latex.Command('addlinespace', options='3mm'))
        for i in range(2):
            if i == 0:
                cell0 = latex.MultiRow(
                    # 3,
                    2,
                    data=latex.Command(
                        'rotatebox',
                        ['90', latex.utils.bold('Network')],
                        'origin=c'
                    )
                )
                n = '1'
            else:
                cell0 = ''
                n = '2/3'
            row = [cell0, latex.utils.bold(n)]
            for j in range(3):
                row.append('{:.2f}'.format(matrix[i, j]))
            table.add_row(row)
            if i == 0:
                table.append(latex.Command('addlinespace', options='4mm'))
        path = 'confusion_{}.tex'.format(layer)
        with open(path, 'w') as wfile:
            wfile.write(table.dumps())
            print '  -> wrote ' + path
Ejemplo n.º 13
0
 def writePriceTableTwoIntoLatexDocument(self, data_table):
     tableStr = 'l r p{5.5cm} r'
     headerRow = [u'Código', u'Ud.', u'Descripción', u'Importe']
     nested_data_table = pylatex.Tabular(tableStr)
     #Header
     nested_data_table.add_row(headerRow)
     nested_data_table.add_hline()
     row = self.getLtxCodeUnitDescription()
     row.append('')
     nested_data_table.add_row(row)
     #Decomposition
     self.components.writePriceTableTwoIntoLatexDocument(
         nested_data_table, True)
     #XXX Here cumulated percentages.
     data_table.add_row([nested_data_table])
Ejemplo n.º 14
0
def gen_table(df):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    returns:
    - side effect
    '''
    with doc.create(pl.Tabular('l  r  r  ', col_space='0.15cm',
                               row_height=1.2)) as table:
        header_row = ["Index", "", "+/-"]
        table.add_hline()
        table.add_row(header_row, mapper=[bold])
        table.add_hline()
    for ind in df.index:
        table.add_row(_fill_row(ind, df))
    table.add_hline()
Ejemplo n.º 15
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(
            pl.Tabular('l|c c|c c|c c|c c c', col_space='0.15cm',
                       pos=['h'])) as table:
        if doc.__dict__['default_filepath'].split('/')[-1] == "NTN-B":
            header_row0 = [
                'Expiry', 'Yield', "Daily Ch.", 'BE', 'Ch. BE', 'FRA',
                'FRA BE', "Duration", "BRL", "Carry"
            ]
            header_row1 = [
                '', '', '(%)', '', '', '', '(Daily)', "(Macaulay)", "PV01",
                "(bps)"
            ]
        else:
            header_row0 = [
                'Expiry', 'Yield', 'Daily Ch.', 'Spread', 'Daily Ch.', '%DI',
                'Daily Ch.', "Duration", "BRL", "DI"
            ]
            header_row1 = [
                '', '', '(%)', '', 'Daily', '', '(%)', "(Macaulay)", "PV01",
                "Conts."
            ]
        table.add_hline()
        table.add_row(header_row0, mapper=[bold], color=color)
        table.add_row(header_row1, mapper=[bold], color=color)
        table.add_hline()
    for ind in df.index:
        color = 'white' if color == 'lightgray' else 'lightgray'
        row = _fill_row(ind, df)
        table.add_row(row, color=color)
    table.add_hline()
Ejemplo n.º 16
0
 def writePriceJustification(self, data_table):
     tableStr = 'l r l p{4cm} r r'
     nested_data_table = pylatex.Tabular(tableStr)
     row = [pylatex_utils.ascii2latex(self.Codigo())]
     row.append(pylatex_utils.ascii2latex(self.Unidad()))
     row.append(
         pylatex.table.MultiColumn(4,
                                   align=pylatex.utils.NoEscape('p{7cm}'),
                                   data=pylatex_utils.ascii2latex(
                                       self.getLongDescription())))
     nested_data_table.add_row(row)
     #Header
     headerRow = [
         u'Código', u'Rdto.', u'Ud.', u'Descripción', u'Unit.', u'Total'
     ]
     nested_data_table.add_row(headerRow)
     nested_data_table.add_hline()
     #Decomposition
     self.components.writePriceJustification(nested_data_table, True)
     #XXX Here cumulated percentages.
     data_table.add_row([nested_data_table])
Ejemplo n.º 17
0
def table() -> pylatex.Table:
    optics_all = esis.flight.optics.as_measured()
    detector = optics_all.detector
    result = pylatex.Table()
    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular('ccccc')) as tabular:
            tabular.escape = False
            tabular.add_row([
                r'Channel',
                r'Quad.',
                r'Gain',
                r'Read noise',
                r'Dark current',
            ])
            tabular.add_row([
                '', '', f'({detector.gain.unit:latex_inline})',
                f'({detector.readout_noise.unit:latex_inline})',
                f'({detector.dark_current.unit:latex_inline})'
            ])
            tabular.add_hline()
            for i in range(detector.gain.shape[0]):
                for j in range(detector.gain.shape[1]):
                    if j == 0:
                        channel_name_i = optics_all.channel_name[i]
                        serial_number_i = f'({detector.serial_number[i]})'
                    else:
                        channel_name_i = ''
                        serial_number_i = ''
                    tabular.add_row([
                        f'{channel_name_i} {serial_number_i}',
                        j + 1,
                        detector.gain[i, j].value,
                        detector.readout_noise[i, j].value,
                        f'{detector.dark_current[i, j].value:0.3f}',
                    ])
                tabular.add_hline()
    result.add_caption(pylatex.NoEscape(r'\ESIS\ camera properties'))
    result.append(kgpy.latex.Label('tabel:cameraProperties'))
    return result
Ejemplo n.º 18
0
 def statistics(self, leak):
     ml = leak.status.max_leak()
     if ml is None:
         return
     self.doc.append(NewLine())
     self.doc.append(pylatex.utils.bold("Generic Test Result:"))
     self.doc.append(NewLine())
     ml_type = f"H_{ml.nstype}" if isinstance(ml, NSLeak) else f"M_{ml.sptype}"
     with self.doc.create(pylatex.Tabular("l l l l l")) as data_table:
         data_table.add_row(
             ["Source", "Kuiper", "Significance", "Confidence", "Key"]
         )
         data_table.add_hline()
         data_table.add_row(
             [
                 ml_type,
                 f"{ml.teststat:.3f}",
                 f"{ml.limit:.3f}",
                 f"{ml.confidence}",
                 str(ml.key),
             ]
         )
     self.doc.append(NewLine())
Ejemplo n.º 19
0
def table_old() -> pylatex.Table:
    result = pylatex.Table()
    result._star_latex_name = True
    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular('llrr')) as tabular:
            tabular.escape = False
            tabular.add_row(
                [r'Element', r'Parameter', r'Requirement', r'Measured'])
            tabular.add_hline()
            tabular.add_row(
                [r'Primary', r'RMS slope error ($\mu$rad)', r'$<1.0$', r''])
            tabular.add_row([r'', r'Integration length (mm)', r'4.0', r''])
            tabular.add_row([r'', r'Sample length (mm)', r'2.0', r''])
            tabular.add_hline()
            tabular.add_row(
                [r'Primary', r'RMS roughness (nm)', r'$<2.5$', r''])
            tabular.add_row([r'', r'Periods (mm)', r'0.1-6', r''])
            tabular.add_hline()
            tabular.add_row(
                [r'Grating', r'RMS slope error ($\mu$rad)', r'$<3.0$', r''])
            tabular.add_row([
                r'', r'Integration length (mm)', r'2 \roy{why fewer sigfigs?}',
                r''
            ])
            tabular.add_row([r'', r'Sample length (mm)', r'1', r''])
            tabular.add_hline()
            tabular.add_row(
                [r'Grating', r'RMS roughness (nm)', r'$<2.3$', r''])
            tabular.add_row([r'', r'Periods (mm)', r'0.02-2', r''])
            tabular.add_hline()
    result.add_caption(
        pylatex.NoEscape(r"""
Figure and surface roughness requirements compared to metrology for the \ESIS\ optics.
Slope error (both the numerical estimates and the measurements) is worked out with integration length and sample length 
defined per ISO 10110."""))
    result.append(kgpy.latex.Label('table:error'))
    return result
Ejemplo n.º 20
0
def _add_keyboard_notes(document: pylatex.Document, texts: dict,
                        images: dict) -> None:
    document.append(
        pylatex.Subsection(
            title=texts["keyboard"]["title"],
            label=False,
            numbering=False,
        ))

    document.append(texts["keyboard"]["text0"])
    document.append(pylatex.NoEscape(r"\vspace{5mm}"))

    table = pylatex.Tabular(r" l | l | p{9cm} ")

    # table.add_hline()
    table.add_row(*tuple(
        pylatex.MediumText(column_title) for column_title in (
            "Bereich",
            "Beschreibung der Klänge",
            "verwendete Lautsprecher",
        )))
    for zone_idx in range(3):
        table.add_hline()
        table.add_row(
            _make_img(images["zone_{}".format(zone_idx)],
                      width=0.22,
                      add_figure=False),
            texts["keyboard"]["zone{}sound".format(zone_idx)],
            texts["keyboard"]["zone{}speaker".format(zone_idx)],
        )

    # table.add_hline()

    document.append(pylatex.Table(data=table, position="h!"))
    document.append(texts["keyboard"]["text1"])
    # document.append(pylatex.NoEscape(r"\vspace{3mm}"))
    document.append(texts["keyboard"]["text2"])
Ejemplo n.º 21
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(pl.Tabular('l | c c c c c', col_space='0.15cm',
                               pos=['h'])) as table:
        header_row0 = [
            "Time", "Country", "Indicator", "Period", "Forecast", "Impact"
        ]
        table.add_hline()
        table.add_row(header_row0, mapper=[bold], color=color)
        table.add_hline()
    for ind in df.index:
        color = 'white' if color == 'lightgray' else 'lightgray'
        row = _fill_row(ind, df)
        table.add_row(row, color=color)
    table.add_hline()
Ejemplo n.º 22
0
def gen_table(df, doc):
    '''
    add a table to a doc.
    inputs:
    ------
    - df: DataFrame
    - doc: str (latex document)
    returns:
    - side effect
    '''
    color = 'white'
    with doc.create(pl.Tabular('l | c c c c', col_space='0.15cm',
                               pos=['h'])) as table:
        header_row0 = ["Contracts", "D-1", "D-Zero", "Ch. Daily", "Vol."]
        header_row1 = ['', '(k)', '(k)', '(k)', '(k)']
        table.add_hline()
        table.add_row(header_row0, mapper=[bold], color=color)
        table.add_row(header_row1, mapper=[bold], color=color)
        table.add_hline()
    for ind in df.index:
        color = 'white' if color == 'lightgray' else 'lightgray'
        row = _fill_row(ind, df)
        table.add_row(row, color=color)
    table.add_hline()
Ejemplo n.º 23
0
def generate_roga(seq_list, genus, lab, source, work_dir, amendment_flag,
                  amended_id):
    """
    Generates PDF
    :param seq_list: List of OLC Seq IDs
    :param genus: Expected Genus for samples (Salmonella, Listeria, or Escherichia)
    :param lab: ID for lab report is being generated for
    :param source: string input for source that strains were derived from, i.e. 'ground beef'
    :param work_dir: bio_request directory
    :param amendment_flag: determined if the report is an amendment type or not (True/False)
    :param amended_id: ID of the original report that the new report is amending
    """

    # RETRIEVE DATAFRAMES FOR EACH SEQID
    metadata_reports = extract_report_data.get_combined_metadata(seq_list)
    gdcs_reports = extract_report_data.get_gdcs(seq_list)
    gdcs_dict = extract_report_data.generate_gdcs_dict(gdcs_reports)

    # DATE SETUP
    date = datetime.today().strftime('%Y-%m-%d')
    year = datetime.today().strftime('%Y')

    # PAGE SETUP
    geometry_options = {
        "tmargin": "2cm",
        "lmargin": "1cm",
        "rmargin": "1cm",
        "headsep": "1cm"
    }

    doc = pl.Document(page_numbers=False, geometry_options=geometry_options)

    header = produce_header_footer()
    doc.preamble.append(header)
    doc.change_document_style("header")

    # DATABASE HANDLING
    report_id = update_db(date=date,
                          year=year,
                          genus=genus,
                          lab=lab,
                          source=source,
                          amendment_flag=amendment_flag,
                          amended_id=amended_id)

    # MARKER VARIABLES SETUP
    all_uida = False
    all_vt = False
    all_mono = False
    all_enterica = False

    # SECOND VALIDATION SCREEN
    if genus == 'Escherichia':
        validated_ecoli_dict = extract_report_data.validate_ecoli(
            seq_list, metadata_reports)
        vt_list = []
        uida_list = []

        for key, value in validated_ecoli_dict.items():
            ecoli_uida_present = validated_ecoli_dict[key][0]
            ecoli_vt_present = validated_ecoli_dict[key][1]

            uida_list.append(ecoli_uida_present)
            vt_list.append(ecoli_vt_present)

            if not ecoli_uida_present:
                print(
                    'WARNING: uidA not present for {}. Cannot confirm E. coli.'
                    .format(key))
            if not ecoli_vt_present:
                print('WARNING: vt probe sequences not detected for {}. '
                      'Cannot confirm strain is verotoxigenic.'.format(key))

        if False not in uida_list:
            all_uida = True
        if False not in vt_list:
            all_vt = True

    elif genus == 'Listeria':
        validated_listeria_dict = extract_report_data.validate_listeria(
            seq_list, metadata_reports)
        mono_list = []
        for key, value in validated_listeria_dict.items():
            mono_list.append(value)
        if False not in mono_list:
            all_mono = True

    elif genus == 'Salmonella':
        validated_salmonella_dict = extract_report_data.validate_salmonella(
            seq_list, metadata_reports)
        enterica_list = []
        for key, value in validated_salmonella_dict.items():
            enterica_list.append(value)
        if False not in enterica_list:
            all_enterica = True

    # MAIN DOCUMENT BODY
    with doc.create(
            pl.Section('Report of Genomic Analysis: ' + genus,
                       numbering=False)):

        # REPORT ID AND AMENDMENT CHECKING
        if amendment_flag:
            doc.append(bold('Report ID: '))
            doc.append(report_id)
            doc.append(italic(' (This report is an amended version of '))
            doc.append(amended_id)
            doc.append(italic(')'))
            doc.append(bold('\nReporting laboratory: '))
            doc.append(lab)
            doc.append('\n\n')

            # LAB SUMMARY
            with doc.create(pl.Tabular('lcr', booktabs=True)) as table:
                table.add_row(bold('Laboratory'), bold('Address'),
                              bold('Tel #'))
                table.add_row(lab, lab_info[lab][0], lab_info[lab][1])

            # AMENDMENT FIELD
            with doc.create(
                    pl.Subsubsection('Reason for amendment:',
                                     numbering=False)):
                with doc.create(Form()):
                    doc.append(pl.Command('noindent'))
                    doc.append(
                        pl.Command('TextField',
                                   options=[
                                       "name=amendmentbox", "multiline=true",
                                       pl.NoEscape("bordercolor=0 0 0"),
                                       pl.NoEscape("width=7in"),
                                       "height=0.43in"
                                   ],
                                   arguments=''))
        else:
            doc.append(bold('Report ID: '))
            doc.append(report_id)
            doc.append(bold('\nReporting laboratory: '))
            doc.append(lab)
            doc.append('\n\n')

            # LAB SUMMARY
            with doc.create(pl.Tabular('lcr', booktabs=True)) as table:
                table.add_row(bold('Laboratory'), bold('Address'),
                              bold('Tel #'))
                table.add_row(lab, lab_info[lab][0], lab_info[lab][1])

        # TEXT SUMMARY
        with doc.create(
                pl.Subsection('Identification Summary',
                              numbering=False)) as summary:

            summary.append('Whole-genome sequencing analysis was conducted on '
                           '{} '.format(len(metadata_reports)))
            summary.append(italic('{} '.format(genus)))

            if len(metadata_reports) == 1:
                summary.append('strain isolated from "{}". '.format(
                    source.lower()))
            else:
                summary.append('strains isolated from "{}". '.format(
                    source.lower()))

            if genus == 'Escherichia':
                if all_uida:
                    summary.append('The following strains are confirmed as ')
                    summary.append(italic('Escherichia coli '))
                    summary.append(
                        'based on 16S sequence and the presence of marker gene '
                    )
                    summary.append(italic('uidA. '))
                elif not all_uida:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Escherichia coli '))
                    summary.append('as the ')
                    summary.append(italic('uidA '))
                    summary.append('marker gene was not detected. ')

                if all_vt:
                    summary.append(
                        'All strain(s) are confirmed to be VTEC based on detection of probe sequences '
                        'indicating the presence of verotoxin genes.')

            elif genus == 'Listeria':
                if all_mono:
                    summary.append(
                        'The following strains are confirmed to be ')
                    summary.append(italic('Listeria monocytogenes '))
                    summary.append('based on GeneSeekr analysis: ')
                else:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Listeria monocytogenes.'))

            elif genus == 'Salmonella':
                if all_enterica:
                    summary.append(
                        'The following strains are confirmed to be ')
                    summary.append(italic('Salmonella enterica '))
                    summary.append('based on GeneSeekr analysis: ')
                else:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Salmonella enterica.'))

        # ESCHERICHIA TABLE
        if genus == 'Escherichia':
            genesippr_table_columns = (
                bold('ID'),
                bold(pl.NoEscape(r'uidA{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'Serotype')),
                bold(pl.NoEscape(r'Verotoxin Profile')),
                bold(pl.NoEscape(r'eae{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(
                        pl.Tabular(table_spec='|c|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        lsts_id = df.loc[df['SeqID'] ==
                                         sample_id]['SampleName'].values[0]

                        # Genus (pulled from 16S)
                        genus = df.loc[df['SeqID'] ==
                                       sample_id]['Genus'].values[0]

                        # Serotype
                        serotype = df.loc[df['SeqID'] == sample_id][
                            'E_coli_Serotype'].values[0]

                        # Remove % identity
                        fixed_serotype = remove_bracketed_values(serotype)

                        # Verotoxin
                        verotoxin = df.loc[df['SeqID'] == sample_id][
                            'Vtyper_Profile'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]

                        (uida, eae) = '-', '-'
                        if 'uidA' in marker_list:
                            uida = '+'
                        if 'eae' in marker_list:
                            eae = '+'

                        table.add_row((lsts_id, uida, fixed_serotype,
                                       verotoxin, eae, mlst, rmlst))
                    table.add_hline()

                create_caption(
                    genesippr_section, 'a', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # LISTERIA TABLE
        if genus == 'Listeria':
            genesippr_table_columns = (
                bold('ID'),
                bold(pl.NoEscape(r'IGS{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'inlJ{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        lsts_id = df.loc[df['SeqID'] ==
                                         sample_id]['SampleName'].values[0]

                        # Genus
                        genus = df.loc[df['SeqID'] ==
                                       sample_id]['Genus'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        # Markers
                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]
                        (igs, hlya, inlj) = '-', '-', '-'
                        if 'IGS' in marker_list:
                            igs = '+'
                        if 'hlyA' in marker_list:
                            hlya = '+'
                        if 'inlJ' in marker_list:
                            inlj = '+'

                        table.add_row((lsts_id, igs, hlya, inlj, mlst, rmlst))
                    table.add_hline()
                create_caption(
                    genesippr_section, 'a', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # SALMONELLA TABLE
        if genus == 'Salmonella':
            genesippr_table_columns = (
                bold('ID'),
                bold(
                    pl.NoEscape(
                        r'Serovar{\footnotesize \textsuperscript {a}}')),
                bold(
                    pl.NoEscape(
                        r'Serogroup{\footnotesize \textsuperscript {a,b}}')),
                bold(pl.NoEscape(r'H1{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'H2{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'invA{\footnotesize \textsuperscript {b}}')),
                bold(pl.NoEscape(r'stn{\footnotesize \textsuperscript {b}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(pl.Tabular('|c|c|c|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        lsts_id = df.loc[df['SeqID'] ==
                                         sample_id]['SampleName'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        # Serovar
                        serovar = df.loc[df['SeqID'] ==
                                         sample_id]['SISTR_serovar'].values[0]

                        # SISTR Serogroup, H1, H2
                        sistr_serogroup = df.loc[df['SeqID'] == sample_id][
                            'SISTR_serogroup'].values[0]
                        sistr_h1 = df.loc[df['SeqID'] == sample_id][
                            'SISTR_h1'].values[0].strip(';')
                        sistr_h2 = df.loc[df['SeqID'] == sample_id][
                            'SISTR_h2'].values[0].strip(';')

                        # Markers
                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]
                        (inva, stn) = '-', '-'
                        if 'invA' in marker_list:
                            inva = '+'
                        if 'stn' in marker_list:
                            stn = '+'

                        table.add_row(
                            (lsts_id, serovar, sistr_serogroup, sistr_h1,
                             sistr_h2, inva, stn, mlst, rmlst))
                    table.add_hline()

                create_caption(
                    genesippr_section, 'a',
                    "Predictions conducted using SISTR "
                    "(Salmonella In Silico Typing Resource)")
                create_caption(
                    genesippr_section, 'b', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # SEQUENCE TABLE
        sequence_quality_columns = (
            bold('ID'),
            bold(pl.NoEscape(r'Total Length')),
            bold(pl.NoEscape(r'Coverage')),
            bold(pl.NoEscape(r'GDCS')),
            bold(pl.NoEscape(r'Pass/Fail')),
        )

        with doc.create(
                pl.Subsection('Sequence Quality Metrics', numbering=False)):
            with doc.create(pl.Tabular('|c|c|c|c|c|')) as table:
                # Header
                table.add_hline()
                table.add_row(sequence_quality_columns)

                # Rows
                for sample_id, df in metadata_reports.items():
                    table.add_hline()

                    # Grab values
                    lsts_id = df.loc[df['SeqID'] ==
                                     sample_id]['SampleName'].values[0]
                    total_length = df.loc[df['SeqID'] ==
                                          sample_id]['TotalLength'].values[0]
                    average_coverage_depth = df.loc[df['SeqID'] == sample_id][
                        'AverageCoverageDepth'].values[0]

                    # Fix coverage
                    average_coverage_depth = format(
                        float(str(average_coverage_depth).replace('X', '')),
                        '.0f')
                    average_coverage_depth = str(average_coverage_depth) + 'X'

                    # Matches
                    matches = gdcs_dict[sample_id][0]

                    passfail = gdcs_dict[sample_id][1]
                    if passfail == '+':
                        passfail = 'Pass'
                    elif passfail == '-':
                        passfail = 'Fail'

                    # Add row
                    table.add_row((lsts_id, total_length,
                                   average_coverage_depth, matches, passfail))
                table.add_hline()

        # PIPELINE METADATA TABLE
        pipeline_metadata_columns = (bold('ID'), bold('Seq ID'),
                                     bold('Pipeline Version'),
                                     bold('Database Version'))

        with doc.create(pl.Subsection('Pipeline Metadata', numbering=False)):
            with doc.create(pl.Tabular('|c|c|c|c|')) as table:
                # Header
                table.add_hline()
                table.add_row(pipeline_metadata_columns)

                # Rows
                for sample_id, df in metadata_reports.items():
                    table.add_hline()

                    # ID
                    lsts_id = df.loc[df['SeqID'] ==
                                     sample_id]['SampleName'].values[0]

                    # Pipeline version
                    pipeline_version = df.loc[
                        df['SeqID'] == sample_id]['PipelineVersion'].values[0]
                    database_version = pipeline_version

                    # Add row
                    table.add_row((lsts_id, sample_id, pipeline_version,
                                   database_version))

                table.add_hline()

        # 'VERIFIED BY' FIELD
        with doc.create(pl.Subsubsection('Verified by:', numbering=False)):
            with doc.create(Form()):
                doc.append(pl.Command('noindent'))
                doc.append(
                    pl.Command('TextField',
                               options=[
                                   "name=verifiedbybox", "multiline=false",
                                   pl.NoEscape("bordercolor=0 0 0"),
                                   pl.NoEscape("width=2.5in"), "height=0.3in"
                               ],
                               arguments=''))

    # OUTPUT PDF FILE
    pdf_file = os.path.join(work_dir,
                            '{}_{}_{}'.format(report_id, genus, date))

    try:
        doc.generate_pdf(pdf_file, clean_tex=False)
    except:
        pass

    pdf_file += '.pdf'
    return pdf_file
Ejemplo n.º 24
0
    def tabulateVariables(self):
        """
        Create a table that summarises all input variables.
        """
        ks = self.best1d[:]
        ks.append(self.constker)
        ks.sort(key=lambda k: round(k.getNLML(), 2))
        ks.sort(key=lambda k: round(k.error(), 4))
        data = ks[0].data
        ds = data.getDataShape()

        nlml_min = round(min([k.getNLML() for k in ks]), 2)
        error_min = round(min([k.error() for k in ks]), 4)

        doc = self.doc
        with doc.create(pl.Table(position='htbp!')) as tab:
            tab.add_caption(ut.NoEscape("Input variables"))

            t = pl.Tabular('rlrrrcrr')
            # Header
            t.add_hline()
            t.add_row(('', '', pl.MultiColumn(3, align='c', data='Statistics'),
                       pl.MultiColumn(3,
                                      align='c',
                                      data='Classifier Performance')))
            t.add_hline(start=3, end=8)
            t.add_row((pl.MultiColumn(1, align='c', data='Dimension'),
                       pl.MultiColumn(1, align='c', data='Variable'),
                       pl.MultiColumn(1, align='c', data='Min'),
                       pl.MultiColumn(1, align='c', data='Max'),
                       pl.MultiColumn(1, align='c', data='Mean'),
                       pl.MultiColumn(1, align='c', data='Kernel'),
                       pl.MultiColumn(1, align='c', data='NLML'),
                       pl.MultiColumn(1, align='c', data='Error')))
            t.add_hline()

            # Entries
            for k in ks:
                if k is self.constker:
                    row = [
                        ut.italic('--', escape=False),
                        ut.italic('Baseline', escape=False),
                        ut.italic('--', escape=False),
                        ut.italic('--', escape=False),
                        ut.italic('--', escape=False),
                        ut.italic(k.shortInterp(), escape=False),
                        ut.italic('{0:.2f}'.format(k.getNLML()), escape=False),
                        ut.italic(r'{0:.2f}\%'.format(k.error() * 100),
                                  escape=False)
                    ]
                else:
                    dim = k.getActiveDims()[0]
                    row = [
                        dim + 1, data.XLabel[dim],
                        '{0:.2f}'.format(ds['x_min'][dim]),
                        '{0:.2f}'.format(ds['x_max'][dim]),
                        '{0:.2f}'.format(ds['x_mu'][dim]),
                        k.shortInterp(),
                        ut.NoEscape('{0:.2f}'.format(k.getNLML())),
                        ut.NoEscape(r'{0:.2f}\%'.format(k.error() * 100))
                    ]
                if round(k.getNLML(), 2) == nlml_min:
                    row[6] = ut.bold(row[6])
                if round(k.error(), 4) == error_min:
                    row[7] = ut.bold(row[7])

                t.add_row(tuple(row))

            t.add_hline()

            tab.append(ut.NoEscape(r'\centering'))
            tab.append(t)
Ejemplo n.º 25
0
def generate_roga(seq_lsts_dict, genus, lab, source, work_dir, amendment_flag,
                  amended_id):
    """
    Generates PDF
    :param seq_lsts_dict: Dict of SeqIDs;LSTSIDs
    :param genus: Expected Genus for samples (Salmonella, Listeria, Escherichia, or Vibrio)
    :param lab: ID for lab report is being generated for
    :param source: string input for source that strains were derived from, i.e. 'ground beef'
    :param work_dir: bio_request directory
    :param amendment_flag: determined if the report is an amendment type or not (True/False)
    :param amended_id: ID of the original report that the new report is amending
    """

    # RETRIEVE DATAFRAMES FOR EACH SEQID
    seq_list = list(seq_lsts_dict.keys())

    metadata_reports = extract_report_data.get_combined_metadata(seq_list)
    gdcs_reports = extract_report_data.get_gdcs(seq_list)
    gdcs_dict = extract_report_data.generate_gdcs_dict(gdcs_reports)

    # Create our idiot proofing list. There are a bunch of things that can go wrong that should make us not send
    # out reports. As we go through data retrieval/report generation, add things that are wrong to the list, and users
    # will get a message saying what's wrong, no report will be generated unless user adds the FORCE flag.
    idiot_proofing_list = list()
    # DATE SETUP
    date = datetime.today().strftime('%Y-%m-%d')
    year = datetime.today().strftime('%Y')
    # Follow our fiscal year - anything before April is actually previous year.
    if datetime.now().month < 4:
        year = int(year) - 1

    # PAGE SETUP
    geometry_options = {
        "tmargin": "2cm",
        "lmargin": "1cm",
        "rmargin": "1cm",
        "headsep": "1cm"
    }

    doc = pl.Document(page_numbers=False, geometry_options=geometry_options)

    header = produce_header_footer()
    doc.preamble.append(header)
    doc.change_document_style("header")

    # DATABASE HANDLING
    report_id = update_db(date=date,
                          year=year,
                          genus=genus,
                          lab=lab,
                          source=source,
                          amendment_flag=amendment_flag,
                          amended_id=amended_id)

    # MARKER VARIABLES SETUP
    all_uida = False
    all_vt = False
    all_mono = False
    all_enterica = False
    all_vibrio = False
    some_vt = False
    vt_sample_list = []

    # SECOND VALIDATION SCREEN
    if genus == 'Escherichia':
        validated_ecoli_dict = extract_report_data.validate_ecoli(
            seq_list, metadata_reports)
        vt_list = []
        uida_list = []
        hlya_list = []

        for key, value in validated_ecoli_dict.items():
            ecoli_uida_present = validated_ecoli_dict[key][0]
            ecoli_vt_present = validated_ecoli_dict[key][1]
            ecoli_hlya_present = validated_ecoli_dict[key][2]

            hlya_list.append(ecoli_hlya_present)
            uida_list.append(ecoli_uida_present)
            vt_list.append(ecoli_vt_present)

            # For the AMR table so only vt+ samples are shown
            if ecoli_vt_present is True:
                vt_sample_list.append(key)

            if not ecoli_uida_present:
                print(
                    'WARNING: uidA not present for {}. Cannot confirm E. coli.'
                    .format(key))
                idiot_proofing_list.append(
                    'uidA not present in {}. Cannot confirm E. coli'.format(
                        key))
            if not ecoli_vt_present:
                print('WARNING: vt probe sequences not detected for {}. '
                      'Cannot confirm strain is verotoxigenic.'.format(key))
                idiot_proofing_list.append(
                    'VTX not present in {}. Cannot confirm strain is verotoxigenic'
                    .format(key))

        if False not in uida_list:
            all_uida = True
        if False not in vt_list:
            all_vt = True

        if True in vt_list:
            some_vt = True

    elif genus == 'Listeria':
        validated_listeria_dict = extract_report_data.validate_listeria(
            seq_list, metadata_reports)
        mono_list = []
        for key, value in validated_listeria_dict.items():
            mono_list.append(value)
            if value is False:
                idiot_proofing_list.append(
                    'Could not confirm {} as L. monocytogenes'.format(key))
        if False not in mono_list:
            all_mono = True

    elif genus == 'Salmonella':
        validated_salmonella_dict = extract_report_data.validate_salmonella(
            seq_list, metadata_reports)
        enterica_list = []
        for key, value in validated_salmonella_dict.items():
            enterica_list.append(value)
            if value is False:
                idiot_proofing_list.append(
                    'Could not confirm {} as S. enterica'.format(key))
        if False not in enterica_list:
            all_enterica = True

    elif genus == 'Vibrio':
        validated_vibrio_dict = extract_report_data.validate_vibrio(
            seq_list, metadata_reports)
        vibrio_list = list()
        for key, value in validated_vibrio_dict.items():
            vibrio_list.append(value)
            if value is False:
                idiot_proofing_list.append(
                    'Could not confirm {} as Vibrio'.format(key))
        if False not in vibrio_list:
            all_vibrio = True

    # MAIN DOCUMENT BODY
    with doc.create(
            pl.Section('Report of Genomic Analysis: ' + genus,
                       numbering=False)):

        # REPORT ID AND AMENDMENT CHECKING
        if amendment_flag:
            doc.append(bold('Report ID: '))
            doc.append(report_id)
            doc.append(italic(' (This report is an amended version of '))
            doc.append(amended_id)
            doc.append(italic(')'))
            doc.append('\n')
            doc.append(
                pl.Command('TextField',
                           options=[
                               "name=rdimsnumberbox", "multiline=false",
                               pl.NoEscape("bordercolor=0 0 0"),
                               pl.NoEscape("width=1.1in"), "height=0.2in"
                           ],
                           arguments=bold('RDIMS ID: ')))
            doc.append(bold('\nReporting laboratory: '))
            doc.append(lab)
            doc.append('\n\n')

            # LAB SUMMARY
            with doc.create(pl.Tabular('lcr', booktabs=True)) as table:
                table.add_row(bold('Laboratory'), bold('Address'),
                              bold('Tel #'))
                table.add_row(lab, lab_info[lab][0], lab_info[lab][1])

            # AMENDMENT FIELD
            with doc.create(
                    pl.Subsubsection('Reason for amendment:',
                                     numbering=False)):
                with doc.create(Form()):
                    doc.append(pl.Command('noindent'))
                    doc.append(
                        pl.Command('TextField',
                                   options=[
                                       "name=amendmentbox", "multiline=true",
                                       pl.NoEscape("bordercolor=0 0 0"),
                                       pl.NoEscape("width=7in"),
                                       "height=0.43in"
                                   ],
                                   arguments=''))
        else:
            doc.append(bold('Report ID: '))
            doc.append(report_id)
            doc.append('\n')
            doc.append(
                pl.Command('TextField',
                           options=[
                               "name=rdimsnumberbox", "multiline=false",
                               pl.NoEscape("bordercolor=0 0 0"),
                               pl.NoEscape("width=1.1in"), "height=0.2in"
                           ],
                           arguments=bold('RDIMS ID: ')))
            doc.append(bold('\nReporting laboratory: '))
            doc.append(lab)
            doc.append('\n\n')

            # LAB SUMMARY
            with doc.create(pl.Tabular('lcr', booktabs=True)) as table:
                table.add_row(bold('Laboratory'), bold('Address'),
                              bold('Tel #'))
                table.add_row(lab, lab_info[lab][0], lab_info[lab][1])

        # TEXT SUMMARY
        with doc.create(
                pl.Subsection('Identification Summary',
                              numbering=False)) as summary:

            summary.append('Whole-genome sequencing analysis was conducted on '
                           '{} '.format(len(metadata_reports)))
            summary.append(italic('{} '.format(genus)))

            if len(metadata_reports) == 1:
                summary.append('strain isolated from "{}". '.format(
                    source.lower()))
            else:
                summary.append('strains isolated from "{}". '.format(
                    source.lower()))

            if genus == 'Escherichia':
                if all_uida:
                    summary.append('The following strains are confirmed as ')
                    summary.append(italic('Escherichia coli '))
                    summary.append(
                        'based on 16S sequence and the presence of marker gene '
                    )
                    summary.append(italic('uidA. '))
                elif not all_uida:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Escherichia coli '))
                    summary.append('as the ')
                    summary.append(italic('uidA '))
                    summary.append('marker gene was not detected. ')

                if all_vt:
                    summary.append(
                        'All strain(s) are confirmed to be VTEC based on detection of probe sequences '
                        'indicating the presence of verotoxin genes.')

            elif genus == 'Listeria':
                if all_mono:
                    summary.append(
                        'The following strains are confirmed to be ')
                    summary.append(italic('Listeria monocytogenes '))
                    summary.append('based on GeneSeekr analysis: ')
                else:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Listeria monocytogenes.'))

            elif genus == 'Salmonella':
                if all_enterica:
                    summary.append(
                        'The following strains are confirmed to be ')
                    summary.append(italic('Salmonella enterica '))
                    summary.append('based on GeneSeekr analysis: ')
                else:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Salmonella enterica.'))

            elif genus == 'Vibrio':
                if all_vibrio:
                    summary.append(
                        'The following strains are confirmed to be ')
                    summary.append(italic('Vibrio parahaemolyticus '))
                    summary.append('based on GeneSeekr analysis: ')
                else:
                    summary.append(
                        'Some of the following strains could not be confirmed to be '
                    )
                    summary.append(italic('Vibrio parahaemolyticus.'))

        # VIBRIO TABLE
        if genus == 'Vibrio':
            genesippr_table_columns = (
                bold('ID'),
                bold(pl.NoEscape(r'R72H{\footnotesize \textsuperscript {a}}')),
                bold(
                    pl.NoEscape(r'groEL{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'Virulence Profile')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                        lsts_id = seq_lsts_dict[sample_id]

                        # Genus
                        genus = df.loc[df['SeqID'] ==
                                       sample_id]['Genus'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        # Markers
                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]
                        (r72h, groel) = '-', '-'
                        if 'r72h' in marker_list:
                            r72h = '+'
                        if 'groEL' in marker_list:
                            groel = '+'

                        # Virulence
                        virulence = ''
                        if 'tdh' in marker_list:
                            virulence += 'tdh;'
                        if 'trh' in marker_list:
                            virulence += 'trh;'
                        if ';' in virulence:
                            virulence = virulence[:-1]
                        if virulence == '':
                            virulence = '-'

                        table.add_row(
                            (lsts_id, r72h, groel, virulence, mlst, rmlst))
                    table.add_hline()
                create_caption(
                    genesippr_section, 'a', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # ESCHERICHIA TABLE
        if genus == 'Escherichia':
            genesippr_table_columns = (
                bold('ID'),
                bold(pl.NoEscape(r'uidA{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'Serotype')),
                bold(pl.NoEscape(r'Verotoxin(s)')),
                bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'eae{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'aggR{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(pl.Tabular('|c|c|c|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                        lsts_id = seq_lsts_dict[sample_id]

                        # Genus (pulled from 16S)
                        genus = df.loc[df['SeqID'] ==
                                       sample_id]['Genus'].values[0]

                        # Serotype
                        serotype = df.loc[df['SeqID'] == sample_id][
                            'E_coli_Serotype'].values[0]

                        # Remove % identity
                        fixed_serotype = remove_bracketed_values(serotype)

                        # Verotoxin
                        verotoxin = df.loc[df['SeqID'] == sample_id][
                            'Vtyper_Profile'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]

                        (uida, eae, hlya, aggr) = '-', '-', '-', '-'
                        if 'uidA' in marker_list:
                            uida = '+'
                        if 'eae' in marker_list:
                            eae = '+'
                        if 'hlyA' in marker_list:
                            hlya = '+'
                        if 'aggR' in marker_list:
                            aggr = '+'

                        table.add_row(
                            (lsts_id, uida, fixed_serotype, verotoxin, hlya,
                             eae, aggr, mlst, rmlst))
                    table.add_hline()

                create_caption(
                    genesippr_section, 'a', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # LISTERIA TABLE
        if genus == 'Listeria':
            genesippr_table_columns = (
                bold('ID'),
                bold(pl.NoEscape(r'IGS{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'hlyA{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'inlJ{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(pl.Tabular('|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                        lsts_id = seq_lsts_dict[sample_id]

                        # Genus
                        genus = df.loc[df['SeqID'] ==
                                       sample_id]['Genus'].values[0]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        # Markers
                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]
                        (igs, hlya, inlj) = '-', '-', '-'
                        if 'IGS' in marker_list:
                            igs = '+'
                        if 'hlyA' in marker_list:
                            hlya = '+'
                        if 'inlJ' in marker_list:
                            inlj = '+'

                        table.add_row((lsts_id, igs, hlya, inlj, mlst, rmlst))
                    table.add_hline()
                create_caption(
                    genesippr_section, 'a', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # SALMONELLA TABLE
        if genus == 'Salmonella':
            genesippr_table_columns = (
                bold('ID'),
                bold(
                    pl.NoEscape(
                        r'Serovar{\footnotesize \textsuperscript {a}}')),
                bold(
                    pl.NoEscape(
                        r'Serogroup{\footnotesize \textsuperscript {a,b}}')),
                bold(pl.NoEscape(r'H1{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'H2{\footnotesize \textsuperscript {a}}')),
                bold(pl.NoEscape(r'invA{\footnotesize \textsuperscript {b}}')),
                bold(pl.NoEscape(r'stn{\footnotesize \textsuperscript {b}}')),
                bold(pl.NoEscape(r'MLST')),
                bold(pl.NoEscape(r'rMLST')),
            )

            with doc.create(
                    pl.Subsection('GeneSeekr Analysis',
                                  numbering=False)) as genesippr_section:
                with doc.create(
                        pl.Tabular('|c|p{2cm}|c|c|c|c|c|c|c|')) as table:
                    # Header
                    table.add_hline()
                    table.add_row(genesippr_table_columns)

                    # Rows
                    for sample_id, df in metadata_reports.items():
                        table.add_hline()

                        # ID
                        # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                        lsts_id = seq_lsts_dict[sample_id]

                        # MLST/rMLST
                        mlst = str(df.loc[df['SeqID'] == sample_id]
                                   ['MLST_Result'].values[0]).replace(
                                       '-', 'New')
                        rmlst = str(df.loc[df['SeqID'] == sample_id]
                                    ['rMLST_Result'].values[0]).replace(
                                        '-', 'New')

                        # Serovar
                        serovar = df.loc[df['SeqID'] ==
                                         sample_id]['SISTR_serovar'].values[0]
                        # If the serovar is particularly long, tables end up being longer than the page.
                        # To fix, try to find a space somewhere near the middle of the serovar string and insert a
                        # newline there.

                        if len(serovar) > 12:
                            # First, find what index a space is that we can change.
                            starting_index = int(len(serovar) / 2)
                            index_to_change = 999
                            for i in range(starting_index, len(serovar)):
                                if serovar[i] == ' ':
                                    index_to_change = i
                                    break
                            if index_to_change != 999:
                                serovar_with_newline = ''
                                for i in range(len(serovar)):
                                    if i == index_to_change:
                                        serovar_with_newline += '\\newline '
                                    else:
                                        serovar_with_newline += serovar[i]
                                serovar = pl.NoEscape(r'' +
                                                      serovar_with_newline)

                        # SISTR Serogroup, H1, H2
                        sistr_serogroup = df.loc[df['SeqID'] == sample_id][
                            'SISTR_serogroup'].values[0]
                        sistr_h1 = df.loc[df['SeqID'] == sample_id][
                            'SISTR_h1'].values[0].strip(';')
                        sistr_h2 = df.loc[df['SeqID'] == sample_id][
                            'SISTR_h2'].values[0].strip(';')

                        # Markers
                        marker_list = df.loc[df['SeqID'] == sample_id][
                            'GeneSeekr_Profile'].values[0]
                        (inva, stn) = '-', '-'
                        if 'invA' in marker_list:
                            inva = '+'
                        if 'stn' in marker_list:
                            stn = '+'

                        table.add_row(
                            (lsts_id, serovar, sistr_serogroup, sistr_h1,
                             sistr_h2, inva, stn, mlst, rmlst))
                    table.add_hline()

                create_caption(
                    genesippr_section, 'a',
                    "Predictions conducted using SISTR "
                    "(Salmonella In Silico Typing Resource)")
                create_caption(
                    genesippr_section, 'b', "+ indicates marker presence : "
                    "- indicates marker was not detected")

        # AMR TABLE (VTEC and Salmonella only)
        create_amr_profile = False  # only create if an AMR profile exists for one of the provided samples
        amr_samples = []  # keep track of which samples to create rows for

        # Grab AMR profile as a pre-check to see if we should even create the AMR Profile table
        for sample_id, df in metadata_reports.items():
            profile = df.loc[df['SeqID'] == sample_id]['AMR_Profile'].values[0]
            parsed_profile = extract_report_data.parse_amr_profile(profile)
            if parsed_profile is not None:
                if genus == 'Salmonella':
                    amr_samples.append(sample_id)
                    create_amr_profile = True
                elif genus == 'Escherichia':
                    if sample_id in vt_sample_list:  # vt_sample_list contains all vt+ sample IDs
                        amr_samples.append(sample_id)
                        create_amr_profile = True
                elif genus == 'Vibrio':
                    amr_samples.append(sample_id)
                    create_amr_profile = True

        # Create table
        if (genus == 'Salmonella' or some_vt is True
                or genus == 'Vibrio') and create_amr_profile is True:
            with doc.create(
                    pl.Subsection('Antimicrobial Resistance Profiling',
                                  numbering=False)):
                with doc.create(pl.Tabular('|c|c|c|c|')) as table:
                    amr_columns = (bold('ID'),
                                   bold(pl.NoEscape(r'Resistance')),
                                   bold(pl.NoEscape(r'Gene')),
                                   bold(pl.NoEscape(r'Percent Identity')))
                    # Header
                    table.add_hline()
                    table.add_row(amr_columns)
                    # Keep track of what previous id and resistance were so we know how far to draw lines across
                    # table. Initialize to some gibberish.
                    previous_id = 'asdasdfasdfs'
                    previous_resistance = 'akjsdhfasdf'
                    # For the AMR table, don't re-write sample id if same sample has multiple resistances
                    # Also, don't re-write resistances if same resistance has multiple genes.
                    for sample_id, df in metadata_reports.items():
                        if sample_id in amr_samples:
                            # Grab AMR profile
                            profile = df.loc[df['SeqID'] == sample_id][
                                'AMR_Profile'].values[0]
                            # Parse and iterate through profile to generate rows
                            parsed_profile = extract_report_data.parse_amr_profile(
                                profile)
                            if parsed_profile is not None:
                                # Rows
                                for value in parsed_profile:
                                    # ID
                                    resistance = value.resistance
                                    res_to_write = resistance
                                    lsts_id = seq_lsts_dict[sample_id]
                                    # If sample we're on is different from previous sample, line goes all the
                                    # way across the table.
                                    if lsts_id != previous_id:
                                        table.add_hline()
                                        id_to_write = lsts_id
                                    # If sample is same and resistance is same, only want lines for gene and percent
                                    # identity columns. Don't write out id or resistance again.
                                    elif resistance == previous_resistance:
                                        table.add_hline(start=3, end=4)
                                        id_to_write = ''
                                        res_to_write = ''
                                    # Finally, if resistance is different, but id is same, need line across for
                                    # resistance, gene, and percent id. Write out everything but id
                                    else:
                                        table.add_hline(start=2, end=4)
                                        id_to_write = ''
                                    previous_id = lsts_id
                                    previous_resistance = resistance

                                    # Gene
                                    gene = value.gene

                                    # Identity
                                    identity = value.percent_id

                                    # Add row
                                    table.add_row((id_to_write, res_to_write,
                                                   gene, identity))
                    # Close off table
                    table.add_hline()

        # SEQUENCE TABLE
        with doc.create(
                pl.Subsection('Sequence Quality Metrics', numbering=False)):
            with doc.create(pl.Tabular('|c|c|c|c|c|')) as table:
                # Columns
                sequence_quality_columns = (
                    bold('ID'),
                    bold(pl.NoEscape(r'Total Length')),
                    bold(pl.NoEscape(r'Coverage')),
                    bold(pl.NoEscape(r'GDCS')),
                    bold(pl.NoEscape(r'Pass/Fail')),
                )

                # Header
                table.add_hline()
                table.add_row(sequence_quality_columns)

                # Rows
                for sample_id, df in metadata_reports.items():
                    table.add_hline()

                    # Grab values
                    # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                    lsts_id = seq_lsts_dict[sample_id]
                    total_length = df.loc[df['SeqID'] ==
                                          sample_id]['TotalLength'].values[0]
                    average_coverage_depth = df.loc[df['SeqID'] == sample_id][
                        'AverageCoverageDepth'].values[0]

                    # Fix coverage
                    average_coverage_depth = format(
                        float(str(average_coverage_depth).replace('X', '')),
                        '.0f')
                    average_coverage_depth = str(average_coverage_depth) + 'X'

                    # Matches
                    matches = gdcs_dict[sample_id][0]

                    passfail = gdcs_dict[sample_id][1]
                    if passfail == '+':
                        passfail = 'Pass'
                    elif passfail == '-':
                        passfail = 'Fail'
                        idiot_proofing_list.append(
                            '{} failed GDCS validation'.format(sample_id))

                    # Add row
                    table.add_row((lsts_id, total_length,
                                   average_coverage_depth, matches, passfail))
                table.add_hline()

        # PIPELINE METADATA TABLE
        pipeline_metadata_columns = (bold('ID'), bold('Seq ID'),
                                     bold('Pipeline Version'),
                                     bold('Database Version'))

        with doc.create(pl.Subsection('Pipeline Metadata', numbering=False)):
            with doc.create(pl.Tabular('|c|c|c|c|')) as table:
                # Header
                table.add_hline()
                table.add_row(pipeline_metadata_columns)

                # Rows
                for sample_id, df in metadata_reports.items():
                    table.add_hline()

                    # ID
                    # lsts_id = df.loc[df['SeqID'] == sample_id]['SampleName'].values[0]
                    lsts_id = seq_lsts_dict[sample_id]

                    # Pipeline version
                    pipeline_version = df.loc[
                        df['SeqID'] == sample_id]['PipelineVersion'].values[0]
                    database_version = pipeline_version

                    # Add row
                    table.add_row((lsts_id, sample_id, pipeline_version,
                                   database_version))

                table.add_hline()

        # 'VERIFIED BY' FIELD
        with doc.create(pl.Subsubsection('Verified by:', numbering=False)):
            with doc.create(Form()):
                doc.append(pl.Command('noindent'))
                doc.append(
                    pl.Command('TextField',
                               options=[
                                   "name=verifiedbybox", "multiline=false",
                                   pl.NoEscape("bordercolor=0 0 0"),
                                   pl.NoEscape("width=2.5in"), "height=0.3in"
                               ],
                               arguments=''))

    # OUTPUT PDF FILE
    pdf_file = os.path.join(work_dir,
                            '{}_{}_{}'.format(report_id, genus, date))

    try:
        doc.generate_pdf(pdf_file, clean_tex=False)
    except:
        pass

    pdf_file += '.pdf'
    return pdf_file, idiot_proofing_list
    #computation of the urban night light 
    
    urb_NL = np.sum(np.where(urban_arr==0, arr_NL, 0)) 
    urban_night_light.append(urb_NL)
    
        
    #computation of the average urban night light
    
    masked_array = np.ma.masked_array(arr_NL, mask = urban_arr) 
    avg_urban_NL = masked_array.mean()
    urban_avg_night_light.append(round(avg_urban_NL,3))
    

        
doc = pylatex.Document()
table = pylatex.Tabular("|cc|")
table.add_hline()
table.add_row([pylatex.utils.bold("Population p99 p99 2000"), 
               pylatex.utils.bold("Population p99 p99 2019")])
table.add_hline()
table.add_row([pylatex.MultiColumn(2, align = "|c|", data="Urban Population")])
table.add_hline()
table.add_row(urban_population)
table.add_hline()
table.add_row([pylatex.MultiColumn(2, align = "|c|", data="Average Urban Population")])
table.add_hline()
table.add_row(urban_avg_population)
table.add_hline()
table.add_row([pylatex.MultiColumn(2, align = "|c|", data="Urban Built")])
table.add_hline()
table.add_row(urban_built)
Ejemplo n.º 27
0
def table() -> pylatex.Table:
    optics_single = optics.as_designed_single_channel()
    optics_all = esis.flight.optics.as_measured()
    primary = optics_single.primary
    grating = optics_single.grating
    unit_length_integration = u.mm
    unit_length_sample = u.mm
    unit_slope_error = u.urad
    unit_ripple_period = u.mm
    unit_ripple = u.nm
    unit_microroughness_period = u.um
    unit_microroughness = u.nm
    result = pylatex.Table()
    result._star_latex_name = True
    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular('llrr')) as tabular:
            tabular.escape = False
            tabular.add_row([
                r'Element',
                r'Parameter',
                r'Requirement',
                r'Measured',
            ])
            tabular.add_hline()
            tabular.add_row([
                r'Primary',
                f'RMS slope error ({unit_slope_error:latex_inline})',
                f'{optics_single.primary.slope_error.value.to(unit_slope_error).value:0.1f}',
                f'{optics_all.primary.slope_error.value.to(unit_slope_error).value:0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\\quad Integration length = {primary.slope_error.length_integration.to(unit_length_integration).value:0.1f}\,{unit_length_integration:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'\\quad Sample length = {primary.slope_error.length_sample.to(unit_length_sample).value:0.1f}\,{unit_length_sample:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'RMS roughness ({unit_ripple:latex_inline})',
                f'{optics_single.primary.ripple.value.to(unit_ripple).value:0.1f}',
                f'{optics_all.primary.ripple.value.to(unit_ripple).value:0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\quad Periods = ${primary.ripple.periods_min.to(unit_ripple_period).value:0.2f}-{primary.ripple.periods_max.to(unit_ripple_period).value:0.1f}$\\,{unit_ripple_period:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'RMS microroughness ({unit_microroughness:latex_inline})',
                f'{optics_single.primary.microroughness.value.to(unit_ripple).value:0.1f}',
                f'{optics_all.primary.microroughness.value.to(unit_ripple).value:0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\quad Periods = ${primary.microroughness.periods_min.to(unit_microroughness_period).value:0.2f}-{primary.microroughness.periods_max.to(unit_microroughness_period).value:0.1f}$\\,{unit_microroughness_period:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_hline()
            tabular.add_row([
                r'Grating',
                f'RMS slope error ({unit_slope_error:latex_inline})',
                f'{optics_single.grating.slope_error.value.to(unit_slope_error).value:0.1f}',
                f'{optics_all.grating.slope_error.value.to(unit_slope_error).value.mean():0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\\quad Integration length = {grating.slope_error.length_integration.to(unit_length_integration).value:0.1f}\,{unit_length_integration:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'\\quad Sample length = {grating.slope_error.length_sample.to(unit_length_sample).value:0.1f}\,{unit_length_sample:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'RMS roughness ({unit_ripple:latex_inline})',
                f'{optics_single.grating.ripple.value.to(unit_ripple).value:0.1f}',
                f'{optics_all.grating.ripple.value.to(unit_ripple).value.mean():0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\quad Periods = ${grating.ripple.periods_min.to(unit_ripple_period).value:0.2f}-{grating.ripple.periods_max.to(unit_ripple_period).value:0.1f}$\\,{unit_ripple_period:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_row([
                r'',
                f'RMS microroughness ({unit_microroughness:latex_inline})',
                f'{optics_single.grating.microroughness.value.to(unit_ripple).value:0.1f}',
                f'{optics_all.grating.microroughness.value.to(unit_ripple).value.mean():0.1f}',
            ])
            tabular.add_row([
                r'',
                f'\quad Periods = ${grating.microroughness.periods_min.to(unit_microroughness_period).value:0.2f}-{grating.microroughness.periods_max.to(unit_microroughness_period).value:0.1f}$\\,{unit_microroughness_period:latex_inline}',
                r'',
                r'',
            ])
            tabular.add_hline()

    result.add_caption(
        pylatex.NoEscape(r"""
Figure and surface roughness requirements compared to metrology for the \ESIS\ optics.
Slope error (both the numerical estimates and the measurements) is worked out with integration length and sample length 
defined per ISO 10110."""))
    result.append(kgpy.latex.Label('table:error'))
    return result
Ejemplo n.º 28
0
    def tabulateAll(self):
        """
        Create a table that summarises all input variables and additive
        components, including the constant kernel as baseline and the final
        full additive model.
        """
        # 1-D variables
        ks = self.best1d[:]
        # Baseline: constant kernel
        ks.append(self.constker)
        # Additive components, if not 1-D
        for k in self.summands:
            if len(k.getActiveDims()) > 1:
                ks.append(k)
        # Full additive model, if involves more than one additive term
        best = self.history[-1]
        if len(self.summands) > 1:
            ks.append(best)

        ks.sort(key=lambda k: round(k.getNLML(), 2))
        ks.sort(key=lambda k: round(k.error(), 4))
        data = ks[0].data
        ds = data.getDataShape()

        nlml_min = round(min([k.getNLML() for k in ks]), 2)
        error_min = round(min([k.error() for k in ks]), 4)

        doc = self.doc
        with doc.create(pl.Table(position='htbp!')) as tab:
            caption_str = "Classification performance of the full model, its additive components (if any), all input variables, and the baseline."
            tab.add_caption(ut.NoEscape(caption_str))

            t = pl.Tabular('rlrr')
            # Header
            t.add_hline()
            t.add_row((pl.MultiColumn(1, align='c', data='Dimensions'),
                       pl.MultiColumn(1, align='c', data='Kernel expression'),
                       pl.MultiColumn(1, align='c', data='NLML'),
                       pl.MultiColumn(1, align='c', data='Error')))
            t.add_hline()

            # Entries
            for k in ks:
                if k is self.constker:
                    row = [
                        ut.italic('--', escape=False),
                        ut.italic('$' + k.latex() + '$ (Baseline)',
                                  escape=False),
                        ut.italic('{0:.2f}'.format(k.getNLML()), escape=False),
                        ut.italic(r'{0:.2f}\%'.format(k.error() * 100),
                                  escape=False)
                    ]
                else:
                    dims = sorted(k.getActiveDims())
                    row = [
                        ut.NoEscape(', '.join([str(d + 1) for d in dims])),
                        ut.NoEscape('$' + k.latex() + '$'),
                        ut.NoEscape('{0:.2f}'.format(k.getNLML())),
                        ut.NoEscape(r'{0:.2f}\%'.format(k.error() * 100))
                    ]
                if round(k.getNLML(), 2) == nlml_min:
                    row[2] = ut.bold(row[2])
                if round(k.error(), 4) == error_min:
                    row[3] = ut.bold(row[3])

                t.add_row(tuple(row))

            t.add_hline()

            tab.append(ut.NoEscape(r'\centering'))
            tab.append(t)
Ejemplo n.º 29
0
def table(doc: kgpy.latex.Document) -> pylatex.Table:
    result = pylatex.Table()

    optics_single = optics.as_designed_single_channel()
    wavelength = optics_single.bunch.wavelength
    index_o5 = np.nonzero(optics_single.bunch.ion == 'o_5')[0][0]
    wavelength_o5 = wavelength[index_o5]
    index_mg10_2 = np.nonzero(optics_single.bunch.ion == 'mg_10')[0][1]
    wavelength_mg10_2 = wavelength[index_mg10_2]

    intensity_o5 = [334.97, 285.77, 1018.65, 519.534
                    ] * u.erg / u.cm**2 / u.sr / u.s
    intensity_mg10 = [51.43, 2.62, 397.64, 239.249
                      ] * u.erg / u.cm**2 / u.sr / u.s

    energy_o5 = wavelength_o5.to(u.erg, equivalencies=u.spectral()) / u.photon
    energy_mg10 = wavelength_mg10_2.to(u.erg,
                                       equivalencies=u.spectral()) / u.photon

    optics_single_measured = optics.as_measured_single_channel()
    rays = optics_single_measured.rays_output

    area = rays.intensity.copy()
    area[~rays.mask] = np.nan
    area = np.nansum(
        area, (rays.axis.pupil_x, rays.axis.pupil_y, rays.axis.velocity_los),
        keepdims=True)
    area[area == 0] = np.nan
    area = np.nanmean(area, (rays.axis.field_x, rays.axis.field_y)).squeeze()
    area_o5 = area[0]
    area_mg10 = area[2]

    pixel_subtent = (optics_single.plate_scale.x *
                     optics_single.plate_scale.y * u.pix * u.pix).to(u.sr)
    time_integration = optics_single.detector.exposure_length

    counts_o5 = (intensity_o5 * area_o5 * pixel_subtent * time_integration /
                 energy_o5).to(u.photon)
    counts_mg10 = (intensity_mg10 * area_mg10 * pixel_subtent *
                   time_integration / energy_mg10).to(u.photon)
    counts_total = counts_o5 + counts_mg10

    stack_num = 12
    counts_total_stacked = counts_total * stack_num

    noise_shot = np.sqrt(counts_total.value) * counts_total.unit
    noise_shot_stacked = np.sqrt(
        counts_total_stacked.value) * counts_total.unit

    noise_read = optics_single_measured.detector.readout_noise.mean()
    noise_read = noise_read * optics_single_measured.detector.gain.mean()
    noise_read_o5 = (noise_read / (energy_o5 / (3.6 * u.eV / u.electron))).to(
        u.photon)
    noise_read_o5_stacked = stack_num * noise_read_o5

    noise_total = np.sqrt(np.square(noise_shot) + np.square(noise_read_o5))
    noise_total_stacked = np.sqrt(
        np.square(noise_shot_stacked) + np.square(noise_read_o5_stacked))

    snr = counts_total / noise_total
    snr_stacked = counts_total_stacked / noise_total_stacked

    label = f'1 $\\times$ {kgpy.format.quantity(time_integration, digits_after_decimal=0)} exp.'
    label_stacked = f'{stack_num} $\\times$ {kgpy.format.quantity(time_integration, digits_after_decimal=0)} exp.'

    doc.set_variable(
        name='NumExpInStack',
        value=str(stack_num),
    )

    doc.set_variable_quantity(
        name='StackedCoronalHoleSNR',
        value=snr_stacked[np.argmin(intensity_o5)],
        digits_after_decimal=1,
    )

    with result.create(pylatex.Center()) as centering:
        with centering.create(pylatex.Tabular('lrrrr')) as tabular:
            tabular.escape = False
            tabular.add_row([r'Source', r'\VR', r'\VR', r'\VR', r'\CDS'])
            tabular.add_row(r'Solar context', r'\QSShort', r'\CHShort',
                            r'\ARShort', r'\ARShort')
            tabular.add_hline()
            tabular.add_hline()
            tabular.append(f'\\multicolumn{{5}}{{c}}{{{label}}}\\\\')
            tabular.add_row([
                r'\OV',
            ] + [f'{c:0.0f}' for c in counts_o5.value])
            tabular.add_row([
                r'\MgXdim',
            ] + [f'{c:0.0f}' for c in counts_mg10.value])
            tabular.add_hline()
            tabular.add_row([
                r'Total',
            ] + [f'{c:0.0f}' for c in counts_total.value])
            tabular.add_row([
                r'Shot noise',
            ] + [f'{c:0.1f}' for c in noise_shot.value])
            tabular.add_row([
                r'Read noise',
            ] + 4 * [f'{noise_read_o5.value:0.1f}'])
            tabular.add_row([
                r'\SNRShort',
            ] + [f'{c:0.1f}' for c in snr.value])
            tabular.add_hline()
            tabular.add_hline()
            tabular.append(f'\\multicolumn{{5}}{{c}}{{{label_stacked}}}\\\\')
            tabular.add_row([
                'Total',
            ] + [f'{c:0.0f}' for c in counts_total_stacked.value])
            tabular.add_row([
                r'\SNRShort',
            ] + [f'{c:0.1f}' for c in snr_stacked.value])
            tabular.add_hline()
            tabular.add_hline()

    result.add_caption(
        pylatex.NoEscape(r"""
Estimated signal statistics per channel (in photon counts) for \ESIS\ lines in \CH, \QS, and \AR.
Note that the \SNR\ estimates are lower bounds since charge diffusion decreases the shot noise."""
                         ))
    result.append(kgpy.latex.Label('table:counts'))
    return result
Ejemplo n.º 30
0
    def create_gar(self):
        """
        Create the genesippr analysis report (GAR) that summarises the
        """
        print('Maketh the report!')
        # Date setup
        date = datetime.today().strftime('%Y-%m-%d')
        year = datetime.today().strftime('%Y')

        # Page setup
        geometry_options = {
            "tmargin": "2cm",
            "lmargin": "1.8cm",
            "rmargin": "1.8cm",
            "headsep": "1cm"
        }

        doc = pylatex.Document(page_numbers=False,
                               geometry_options=geometry_options)

        header = self.produce_header_footer()

        doc.preamble.append(header)
        doc.change_document_style("header")

        #
        # DOCUMENT BODY/CREATION
        with doc.create(
                pylatex.Section('GeneSippr Analysis Report', numbering=False)):
            doc.append('GeneSippr!')

            with doc.create(
                    pylatex.Subsection('GeneSeekr Analysis',
                                       numbering=False)) as genesippr_section:
                with doc.create(
                        pylatex.Tabular(
                            '|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|c|')
                ) as table:
                    # Header
                    table.add_hline()
                    table.add_row(self.genesippr_table_columns)
                    for sample_name in self.samples:
                        table_data = [sample_name]
                        for data in self.genesippr_headers:
                            try:
                                print(
                                    sample_name, data,
                                    self.report_data['genesippr'][sample_name]
                                    [data])
                                table_data.append(self.report_data['genesippr']
                                                  [sample_name][data])
                            except KeyError:
                                pass
                        table.add_row(table_data)
            self.create_caption(
                genesippr_section, 'a', "+ indicates marker presence : "
                "- indicates marker was not detected")

        # Create the PDF
        doc.generate_pdf('{}_{}_{}'.format(
            os.path.join(
                '/home/adamkoziol/Bioinformatics/sippr/gui/161104_M02466_0002_000000000-AV4G5'
            ), 'gar', date),
                         clean_tex=False)
        print('{}_{}_{}'.format(
            os.path.join(
                '/home/adamkoziol/Bioinformatics/sippr/gui/161104_M02466_0002_000000000-AV4G5'
            ), 'gar', date))