def build(self, input_file, output_file, output_format, asc_dir, batch_dir): """ Reads the target directory, and records the files found which have the specified file extension. Parameters ---------- :param input_file: the test vector database file to be parsed. :param output_file: the output path to record information to (i.e. index.html). :param output_format: the output format, i.e. CSV or JSON. :param asc_dir: path to the directory containing .asc files and their PNGs. :param batch_dir: path to the directory containing text files describing batches. Returns ---------- N/A """ # Counts the test vector entries read from the test vector # database (CSV) file. entriesProcessed = 0 if Common.file_exists(input_file): print "\t\tReading: ", input_file # Used to measure processing time. start = datetime.datetime.now() # Load any available batch information from the files in # the batch directory. batch_info = self.processBatchDirectory(batch_dir) # read the input file entries = Common.read_file(input_file) if entries is None: print '\t\tTest vector database file empty!' return False else: # Now check there is at least 1 entry. if len(entries) <= 0: print '\t\tTest vector database file empty!' return False else: # There must be some data available. The next part attempts # to iterate over each line in the database file, and build # a HTML table <td></td> item from it. This information can # then be simply copied in to a HTML file. html = "" for test_vector in entries: # If we reach here, the file is in principle in the correct format. # Here are the index positions of the data items as they should appear # if the data is valid: # # 0 = <Filename> # 1 = <Batch> # 2 = <Type> # 3 = <Period (ms)> # 4 = <DM> # 5 = <Z> # 6 = <S/N> # 7 = <EPN Pulsar> # 8 = <Frequency> # 9 = <Path> # 10 = <Parent Dir> # 11 = <Size Bits> # 12 = <Size GB> # 13 = <MD5> parameters = test_vector.split(',') # Check the parameters are as we expect... if parameters is None: print '\t\tTest vector parameters are empty - is the file empty (empty rows)' return False else: # Now we check there are the correct number of parameters... if len(parameters) != 14: print '\t\tFile has incorrect number of parameters (length', len(parameters), ')' return False else: # If here, we should have the correct number of parameters.... table_data = self.createTableData(parameters, asc_dir, batch_info) if table_data is not None: html += table_data entriesProcessed += 1 else: print '\t\tUnable to build HTML table item - some unknown error' return False # Now merge the HTML file components. top = Common.read_file_as_string('html_fragments/top.html') middle = Common.read_file_as_string('html_fragments/middle.html') # Build Batch info popup_html = '' popup_script = '\n<script>\n\t$(document).ready(function () {\n' if batch_info is not None: if len(batch_info)>0: for key, value in batch_info.iteritems(): popup_html += value[0] popup_script += value[1] # Close the script popup_script += '\t});\n</script>\n' # Add bottom of HTML file. bottom = Common.read_file_as_string('html_fragments/bottom.html') INDEX_HTML = top + html + middle + popup_html + popup_script + bottom # Delete output file in case it exists. Common.delete_file(output_file) # This is a simple fudge, allowing the page to be updated # at certain keyword locations. INDEX_HTML = INDEX_HTML.replace('@TOTAL@', str(entriesProcessed)) # Now build the output file Common.append_to_file(output_file, INDEX_HTML) # Finally get the time that the procedure finished. end = datetime.datetime.now() print "Completed file search." print "Entries processed:", str(entriesProcessed) print "Execution time: ", str(end - start) print "Done parsing directory" else: print "No valid directory supplied"
def WriteAsCSV(self, Type, Batch, Period, DM, Z, SNR, EPN, Freq, full_file_path, parent, file_name, output_path): """ Writes data to a file in the following CSV format: <Filename>,<Type>,<Period (ms)>,<DM>,<S/N>,<EPN Pulsar>,<Frequency>,<Path>,<Parent Dir>,<Size Bits>,<Size GB>,<MD5> As each file name should be unique, we can use <Filename> as a unique identifier. Parameters ---------- :param type: The type of the data file, i.e. fake pulsar or real pulsar example. :param Batch: The batch the test vector was generated in. :param Period: The pulse period of the fake pulsar. :param DM: The dispersion measure of the fake pulsar. :param Z: The acceleration applied to the injected signal. :param SNR: The S/N ratio of the fake pulsar. :param EPN: The profile file, extracted from EPN database data. :param Freq: The frequency the EPN data was observed at. :param full_file_path: the full path to the file found. :param parent: the full path to the file found. :param file_name: the full path to the file found. :param output_file: the output path to record information to. Returns ---------- :return: N/A. """ print "\t\tRecording file: ", full_file_path # This is the correct CSV format. Here are the index positions # of the data items as they should appear: # # 0 = <Filename> # 1 = <Batch> # 2 = <Type> # 3 = <Period (ms)> # 4 = <DM> # 5 = <Z> # 6 = <S/N> # 7 = <EPN Pulsar> # 8 = <Frequency> # 9 = <Path> # 10 = <Parent Dir> # 11 = <Size Bits> # 12 = <Size GB> # 13 = <MD5> # Get the size of the files try: size_in_bits = Common.file_size_bits(full_file_path) if size_in_bits is not None: if size_in_bits > 0: size_in_gb = DataConversions.convertBitToByte(size_in_bits, 'GB') # Now compute MD5 hash... md5_value = self.generate_file_md5(full_file_path) output = file_name + ',' + str(Batch) +',' + Type + ',' + Period + ',' + str(DM) + ',' + str(Z)+',' output += str(SNR) + ',' + EPN + ',' + str(Freq) + ',' + full_file_path + ',' + parent + ',' output += str(size_in_bits) + ',' + str(size_in_gb) + ',' + md5_value + '\n' Common.append_to_file(output_path, output) return True, DataConversions.convertBitToByte(size_in_bits, 'GB') else: print "\t\tError recording test vector file size 0: ", file_name return False, 0 else: print "\t\tError recording test vector file size None: ", file_name return False, 0 except Exception: print "\t\tError extracting MD5/size for: ", file_name return False, 0