Example #1
0
    def build(self, input_file, output_file, output_format, asc_dir, batch_dir):
        """
        Reads the target directory, and records the files found which
        have the specified file extension.

        Parameters
        ----------
        :param input_file: the test vector database file to be parsed.
        :param output_file: the output path to record information to (i.e. index.html).
        :param output_format: the output format, i.e. CSV or JSON.
        :param asc_dir: path to the directory containing .asc files and their PNGs.
        :param batch_dir: path to the directory containing text files describing batches.

        Returns
        ----------
        N/A

        """

        # Counts the test vector entries read from the test vector
        # database (CSV) file.
        entriesProcessed = 0

        if Common.file_exists(input_file):

            print "\t\tReading: ", input_file

            # Used to measure processing time.
            start = datetime.datetime.now()

            # Load any available batch information from the files in
            # the batch directory.
            batch_info = self.processBatchDirectory(batch_dir)

            # read the input file
            entries = Common.read_file(input_file)

            if entries is None:
                print '\t\tTest vector database file empty!'
                return False
            else:
                # Now check there is at least 1 entry.
                if len(entries) <= 0:
                    print '\t\tTest vector database file empty!'
                    return False
                else:
                    # There must be some data available. The next part attempts
                    # to iterate over each line in the database file, and build
                    # a HTML table <td></td> item from it. This information can
                    # then be simply copied in to a HTML file.

                    html = ""

                    for test_vector in entries:

                        # If we reach here, the file is in principle in the correct format.
                        # Here are the index positions of the data items as they should appear
                        # if the data is valid:
                        #
                        # 0  = <Filename>
                        # 1  = <Batch>
                        # 2  = <Type>
                        # 3  = <Period (ms)>
                        # 4  = <DM>
                        # 5  = <Z>
                        # 6  = <S/N>
                        # 7  = <EPN Pulsar>
                        # 8  = <Frequency>
                        # 9  = <Path>
                        # 10  = <Parent Dir>
                        # 11  = <Size Bits>
                        # 12 = <Size GB>
                        # 13 = <MD5>

                        parameters = test_vector.split(',')

                        # Check the parameters are as we expect...
                        if parameters is None:
                            print '\t\tTest vector parameters are empty - is the file empty (empty rows)'
                            return False
                        else:
                            # Now we check there are the correct number of parameters...
                            if len(parameters) != 14:
                                print '\t\tFile has incorrect number of parameters (length', len(parameters), ')'
                                return False
                            else:
                                # If here, we should have the correct number of parameters....
                                table_data = self.createTableData(parameters, asc_dir, batch_info)

                                if table_data is not None:
                                    html += table_data
                                    entriesProcessed += 1
                                else:
                                    print '\t\tUnable to build HTML table item - some unknown error'
                                    return False

                    # Now merge the HTML file components.
                    top = Common.read_file_as_string('html_fragments/top.html')
                    middle = Common.read_file_as_string('html_fragments/middle.html')

                    # Build Batch info
                    popup_html = ''
                    popup_script = '\n<script>\n\t$(document).ready(function () {\n'
                    if batch_info is not None:
                        if len(batch_info)>0:
                            for key, value in batch_info.iteritems():
                                popup_html += value[0]
                                popup_script += value[1]

                        # Close the script
                        popup_script += '\t});\n</script>\n'


                    # Add bottom of HTML file.
                    bottom = Common.read_file_as_string('html_fragments/bottom.html')

                    INDEX_HTML = top + html + middle + popup_html + popup_script + bottom

                    # Delete output file in case it exists.
                    Common.delete_file(output_file)

                    # This is a simple fudge, allowing the page to be updated
                    # at certain keyword locations.
                    INDEX_HTML = INDEX_HTML.replace('@TOTAL@', str(entriesProcessed))

                    # Now build the output file
                    Common.append_to_file(output_file, INDEX_HTML)

                    # Finally get the time that the procedure finished.
                    end = datetime.datetime.now()

                    print "Completed file search."
                    print "Entries processed:", str(entriesProcessed)
                    print "Execution time: ", str(end - start)
                    print "Done parsing directory"

        else:
            print "No valid directory supplied"
    def WriteAsCSV(self, Type, Batch, Period, DM, Z, SNR, EPN, Freq, full_file_path, parent, file_name, output_path):
        """
        Writes data to a file in the following CSV format:

        <Filename>,<Type>,<Period (ms)>,<DM>,<S/N>,<EPN Pulsar>,<Frequency>,<Path>,<Parent Dir>,<Size Bits>,<Size GB>,<MD5>

        As each file name should be unique, we can use <Filename> as a unique identifier.

        Parameters
        ----------
        :param type: The type of the data file, i.e. fake pulsar or real pulsar example.
        :param Batch: The batch the test vector was generated in.
        :param Period: The pulse period of the fake pulsar.
        :param DM: The dispersion measure of the fake pulsar.
        :param Z: The acceleration applied to the injected signal.
        :param SNR: The S/N ratio of the fake pulsar.
        :param EPN: The profile file, extracted from EPN database data.
        :param Freq: The frequency the EPN data was observed at.
        :param full_file_path: the full path to the file found.
        :param parent: the full path to the file found.
        :param file_name: the full path to the file found.
        :param output_file: the output path to record information to.

        Returns
        ----------
        :return: N/A.

        """

        print "\t\tRecording file: ", full_file_path

        # This is the correct CSV format. Here are the index positions
        # of the data items as they should appear:
        #
        # 0  = <Filename>
        # 1  = <Batch>
        # 2  = <Type>
        # 3  = <Period (ms)>
        # 4  = <DM>
        # 5  = <Z>
        # 6  = <S/N>
        # 7  = <EPN Pulsar>
        # 8  = <Frequency>
        # 9  = <Path>
        # 10 = <Parent Dir>
        # 11 = <Size Bits>
        # 12 = <Size GB>
        # 13 = <MD5>

        # Get the size of the files
        try:
            size_in_bits = Common.file_size_bits(full_file_path)

            if size_in_bits is not None:
                if size_in_bits > 0:

                    size_in_gb = DataConversions.convertBitToByte(size_in_bits, 'GB')

                    # Now compute MD5 hash...
                    md5_value = self.generate_file_md5(full_file_path)

                    output = file_name + ',' + str(Batch) +',' + Type + ',' + Period + ',' + str(DM) + ',' + str(Z)+','
                    output += str(SNR) + ',' + EPN + ',' + str(Freq) + ',' + full_file_path + ',' + parent + ','
                    output += str(size_in_bits) + ',' + str(size_in_gb) + ',' + md5_value + '\n'

                    Common.append_to_file(output_path, output)

                    return True, DataConversions.convertBitToByte(size_in_bits, 'GB')
                else:
                    print "\t\tError recording test vector file size 0: ", file_name
                    return False, 0
            else:
                print "\t\tError recording test vector file size None: ", file_name
                return False, 0

        except Exception:
            print "\t\tError extracting MD5/size for: ", file_name
            return False, 0