def map_representation(structured_nps, start_words=None, ranking_algorithm=1, similarity_algorithm=2, filtering_algorithm=1, number_of_terms=1000, simplify_terms=False, model=None, data_dump_path=None): """returns a pair similarity dictionary for the map and set of terms in the map. Heatmap can be calculated seperately and then overlaid. Will need to convert dictionary representation to dot file format""" flattened = flatten(structured_nps) set_status('ranking terms', model=model) if start_words is not None: # start words should be a list like ["machine learning", "artificial intelligence"] start_words = [tuple(s.split()) for s in start_words] ranked_phrases, phrase_frequencies, scored_phrases = call_rank( ranking_algorithm, flattened, number_of_terms, start_words=start_words, model=model) else: ranked_phrases, phrase_frequencies, scored_phrases = call_rank( ranking_algorithm, flattened, number_of_terms, model=model) if simplify_terms: structured_nps = simplification.term_replacement( structured_nps, ranked_phrases) set_status('calculating similarity', model=model) sim_matrix, phrase_lookups = call_similarity( similarity_algorithm, structured_nps, ranked_phrases, model=model, status_callback=lambda s: set_status(s, model=model)) if data_dump_path: import pickle from os.path import join def prefix_path(rel): return join(data_dump_path, rel) with open(prefix_path('sim_matrix.pickle'), 'w') as f: pickle.dump(sim_matrix, f) with open(prefix_path('phrase_lookups.pickle'), 'w') as f: pickle.dump(phrase_lookups, f) with open(prefix_path('phrase_frequencies.pickle'), 'w') as f: pickle.dump(phrase_frequencies, f) phrase_pairs = call_filter(filtering_algorithm, sim_matrix, phrase_lookups, model=model) normed = similarity.similarity_dict_to_distance(phrase_pairs) # build set of terms in graph graph_terms = set() for term, lst in normed.items(): graph_terms.add(term) graph_terms.update(term for term, val in lst) return normed, graph_terms, phrase_frequencies, phrase_pairs, scored_phrases
def handle_status(config, args): if args['clear']: clear_status(config.slack, config.default_statuses, config.default_dnd) elif args['set']: try: status = config.statuses[args['<status>']] except KeyError: print( f'{args["<status>"]} is not a vaild status. Valid statuses are:' ) print_statuses_list(config.statuses) exit(1) set_status(config.slack, status, args['<time>']) elif args['show']: if args['<status>'] is None: print(get_status(config.slack)) else: try: print(config.statuses.get(args['<status>'])) except KeyError: print( f'{args["<status>"]} is not a vaild status. Valid statuses are:' ) print_statuses_list(config.statuses) exit(1) elif args['list']: print_statuses_list(config.statuses)
def acquire_status(self, desired_status_string): # Don't need to do anything if we already have the status !! if mpc_status.get_status("mpc_temp_status") != desired_status_string: try: # wait to acquire lock from parallel workers with self.lock.acquire(timeout=timeout): # wait to aquire lock from any other code # e.g. PV uses this during identification/linking while mpc_status.get_status( "mpc_temp_status") != desired_status_string: time.sleep(np.random.rand() * 0.01) if mpc_status.get_status("mpc_temp_status") == '': mpc_status.set_status("mpc_temp_status", desired_status_string) time.sleep(np.random.rand() * 0.01) assert mpc_status.get_status("mpc_temp_status") == desired_status_string, \ f'Problem: mpc_temp_status = {mpc_status.get_status("mpc_temp_status")}' except Exception as e: print('Problem with *aquire_status()*') print(e) print('\t:', desired_status_string) return mpc_status.get_status("mpc_temp_status")
def close_session(handle: LogHandle): """ Closes session and csvfile on disk """ if handle and handle.file: print('closing logging session for file:', handle.file.name) status.set_status('close logging session with file: {}'.format( handle.file.name)) handle.file.close()
def change_port(self, port: serial.Serial): if port and self.serial and port != self.serial.port: # reader thread needs to be shut down self._stop_reader() self.serial = serial.Serial(port.device, BAUDRATE, timeout=10) self.sendCmd('reset') print('open port: ', self.serial, self.serial.port) status.set_status('open port: {}'.format(self.serial.port)) self._start_reader()
def call_filter(filter_index, sim_matrix, phrases, top_limit_override=None, model=None): """ filter_index: 0 = Top; 1 = Pull in Lesser Terms; 2 = Take Top and Fill w/ Lesser """ filtering_fn = filtering_fns[filter_index] set_status('filtering and getting pairwise with %s' % filtering_fn, model=model) if top_limit_override: phrase_pairs = filtering_fn(sim_matrix, phrases, top_limit=top_limit_override) else: phrase_pairs = filtering_fn(sim_matrix, phrases) return phrase_pairs
def call_similarity(similarity_index, structured_nps, phrases, model=None, status_callback=None): """ similarity_index: 0 = LSA (w/ Cosine similarity); 1 = Jaccard; 2 = Jaccard (partial match); 3 = Distributional similarity (w/ Jensen-Shannon divergence) """ # similarity_fns = [similarity.lsa, similarity.jaccard_full, similarity.jaccard_partial] similarity_fn = similarity_fns[similarity_index] set_status('calculating similarity with %s' % similarity_fn, model=model) sim_matrix, phrases = similarity_fn(structured_nps, phrases, status_callback=status_callback) # with open('/tmp/sim.pickle', 'w') as f: # pickle.dump(sim_matrix, f) return sim_matrix, phrases
def start_new_session(directory, file_prefix: str, csv: bool): """ Parameters ---------- directory: str The directory files will be logged to. If it does not exists, logger tries to create the directory file_prefix: str The prefix in the file name. e.g. for 'somefile' the filename will be 'somefile_i.csv' with i the sequence number of the logging session. The logger will check what the latest session with 'file_prefix' in 'directory' was and choose the next value for 'i' Returns ------- logging_handle: LogHandle use this handle for writing data during the session None if there was a problem """ ext = 'csv' if csv else 'txt' p = Path(directory) if not p.exists(): try: p.mkdir(parents=True) except Exception as e: print('error creating log dir: \n', e) return None files = list(p.glob(file_prefix + '*' + ext)) max_i = -1 for f in files: try: i = int(f.stem[len(file_prefix + '_'):]) max_i = max(i, max_i) except ValueError as e: print(e) try: fname = p / '{}_{}.{}'.format(file_prefix, max_i + 1, ext) csvfile = open(str(fname), 'w', newline='') writer = csv.writer(csvfile, delimiter=',') if csv else None print('starting new logging session with file:', str(fname)) status.set_status('starting new logging session with file: {}'.format( str(fname))) return LogHandle(file=csvfile, writer=writer) except Exception as e: print('error opening file: \n', e) return None
def call_graphviz(map_string, file_format='svg', model=None): """map_string should be a string in the dot file format, which the pipeline will be called on. Output in format file_format""" set_status('drawing graph', model=model) gv_command = graphviz_command(file_format=file_format, **GRAPHVIZ_PARAMS) proc = Popen('echo $PATH', stdout=PIPE, shell=True) print "path:", proc.communicate(input='')[0] proc = Popen(gv_command, stdout=PIPE, stdin=PIPE, shell=True) map_out, map_err = proc.communicate(input=map_string) print "return code:", proc.returncode if map_err: print map_err return map_out
def pop_default(config): try: config.default_statuses.pop() except IndexError: pass config.write_config() if len(config.default_statuses) > 0: set_status(config.slack, config.default_statuses[-1]) else: clear_status(config.slack)
def call_rank(ranking_index, flattened, n_large, start_words=[], model=None): """ranking_index: 0 = TFIDF; 1 = C-value; 2 = C-value + Unigrams; 3 = TF""" ranking_fn = ranking_fns[ranking_index] ranking_fn_name = ranking_fn_names[ranking_index] set_status('ranking with %s' % ranking_fn_name, model=model) if debug: print 'ranking with %s' % ranking_fn_name scored_phrases, phrase_frequencies = ranking_fn(flattened) set_status('ordering', model=model) if debug: print 'ordering' ordered_phrases = sorted(scored_phrases.iteritems(), key=lambda p: p[1], reverse=True) # ordered_fname ='../phrase_lists/%s.phrases' % ranking_index # print 'writing ordered phrases to file %s' % ordered_fname # with open(ordered_fname, 'w') as f: # for o in ordered_phrases[:n_large]: # f.write('%s\n' % str(o)) if debug: print 'mapping' ranked_phrases = [p[0] for p in ordered_phrases] if debug: print 'trimming large' large_phrases = ranked_phrases[:n_large] if start_words: if debug: print 'looking for start words', start_words found_start_words = [] for start_word in start_words: matches = (ranked_phrase for ranked_phrase in ranked_phrases if start_word in sub_lists(ranked_phrase, proper=False)) try: word = matches.next() if word not in large_phrases: found_start_words.append(word) except StopIteration: if debug: print 'start word %s not found' % start_word if debug: print 'found start words', found_start_words top_phrases = found_start_words + large_phrases else: top_phrases = large_phrases filtered_frequencies = dict( (phrase, freq) for (phrase, freq) in phrase_frequencies.items() if phrase in top_phrases) return top_phrases, filtered_frequencies, scored_phrases
def relinquish_status(self, desired_status_string): # Only attempt to change if the temp_status is what you think it is ... if mpc_status.get_status("mpc_temp_status") == desired_status_string: # wait to acquire lock from parallel workers with self.lock.acquire(timeout=timeout): # set empty status mpc_status.set_status("mpc_temp_status", "") return True else: return False
def call_rank(ranking_index, flattened, n_large, start_words=[], model=None): """ranking_index: 0 = TFIDF; 1 = C-value; 2 = C-value + Unigrams; 3 = TF""" ranking_fn = ranking_fns[ranking_index] ranking_fn_name = ranking_fn_names[ranking_index] set_status('ranking with %s' % ranking_fn_name, model=model) if debug: print 'ranking with %s' % ranking_fn_name scored_phrases, phrase_frequencies = ranking_fn(flattened) set_status('ordering', model=model) if debug: print 'ordering' ordered_phrases = sorted(scored_phrases.iteritems(), key=lambda p: p[1], reverse=True) # ordered_fname ='../phrase_lists/%s.phrases' % ranking_index # print 'writing ordered phrases to file %s' % ordered_fname # with open(ordered_fname, 'w') as f: # for o in ordered_phrases[:n_large]: # f.write('%s\n' % str(o)) if debug: print 'mapping' ranked_phrases = [p[0] for p in ordered_phrases] if debug: print 'trimming large' large_phrases = ranked_phrases[:n_large] if start_words: if debug: print 'looking for start words', start_words found_start_words = [] for start_word in start_words: matches = (ranked_phrase for ranked_phrase in ranked_phrases if start_word in sub_lists(ranked_phrase, proper=False)) try: word = matches.next() if word not in large_phrases: found_start_words.append(word) except StopIteration: if debug: print 'start word %s not found' % start_word if debug: print 'found start words', found_start_words top_phrases = found_start_words + large_phrases else: top_phrases = large_phrases filtered_frequencies = dict((phrase, freq) for (phrase, freq) in phrase_frequencies.items() if phrase in top_phrases) return top_phrases, filtered_frequencies, scored_phrases
def add_default(config, status, time): try: s = config.statuses[status] except KeyError: print(f'{status} is not a vaild status. Valid statuses are:') print_statuses_list(config.statuses) exit(1) try: t = Expiration.from_timestamp(time) except TimeFormatError as err: print(err) exit(1) s.status_expiration = t config.default_statuses.append(s) config.write_config() set_status(config.slack, s)
def filter_query(query, dirty=False, starting_year=None, ending_year=None, sample_size=None, model=None): filtered = query if not dirty: filtered = query.filter(Grant.clean == True) if ending_year is not None: filtered = filtered.filter(Grant.published_year <= ending_year) if starting_year is not None: filtered = filtered.filter(Grant.published_year >= starting_year) if model is not None: documents_in_set = filtered.count() model.documents_in_set = documents_in_set set_status("%d documents met filtering criteria" % documents_in_set) if sample_size is not None: filtered = filtered.order_by(func.rand()).limit(sample_size) if model is not None: documents_sampled = filtered.count() model.documents_sampled = documents_sampled set_status("%d documents were sampled" % documents_sampled) return filtered
def main(): s = socket.socket() ai = socket.getaddrinfo("0.0.0.0", 80) print("Bind address info:", ai) addr = ai[0][-1] s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(addr) s.listen(5) print("Listening") while True: res = s.accept() client_sock = res[0] client_addr = res[1] print("Client address:", client_addr) print("Client socket:", client_sock) req = client_sock.recv(128) cmd = get_command(req) print("Request:") print(cmd) print() if (cmd == "/status"): client_sock.write(CONTENT % status.status) elif (cmd == "/on"): status.set_status(1) client_sock.write(CONTENT % status.status) elif (cmd == "/off"): status.set_status(0) client_sock.write(CONTENT % status.status) elif (cmd == "/c"): client_sock.write(CONTENT % status.color_hex) elif (cmd[:3] == "/c/"): status.set_color(cmd[3:]) client_sock.write(CONTENT % "OK") else: client_sock.write(CONTENT % "OK") client_sock.close()
def __init__(self): super(PianoApp, self).__init__() self.SerialConnection = SerialConnection() self.window = MainWindow() self.window.setWindowTitle("Piano Sensor") self.window.show() signal.signal(signal.SIGINT, self.window.quit) self.window.closeSignal.connect(self.quit) self.toolbar = QtWidgets.QToolBar() self.window.addToolBar(self.toolbar) self.mainView = MainView(self.toolbar, self.SerialConnection.getDropdownWidget()) self.window.setCentralWidget(self.mainView) self.mainView.refresh.connect(self.SerialConnection.refresh) self.mainView.resetEncoders.connect( lambda: self.SerialConnection.sendCmd('reset')) self.mainView.resetSystem.connect( lambda: self.SerialConnection.sendCmd('sysreset')) self.mainView.getPositions.connect( lambda: self.SerialConnection.sendCmd('pos')) self.parser = SerialParser() self.SerialConnection.textStream.connect(self.parser.parse_line) # self.SerialConnection.textStream.connect(self.mainView.textOutputView.addText) self.parser.comment.connect(self.mainView.textOutputView.addComment) self.parser.newDataSet.connect( lambda i, t, p: self.mainView.resultsView.new_results( KeyPress(i, t, p))) self.parser.newDataSet.connect( lambda i, t, p: self.mainView.textOutputView.new_results( KeyPress(i, t, p))) status.set_status_logger(self.set_status_message) status.set_status('Piano Sensor Ready..')
def filter_query(query, dirty=False, starting_year=None, ending_year=None, sample_size=None, model=None): filtered = query if not dirty: filtered = query.filter(Grant.clean == True) if ending_year is not None: filtered = filtered.filter(Grant.published_year <= ending_year) if starting_year is not None: filtered = filtered.filter(Grant.published_year >= starting_year) if model is not None: documents_in_set = filtered.count() model.documents_in_set = documents_in_set set_status('%d documents met filtering criteria' % documents_in_set) if sample_size is not None: filtered = filtered.order_by(func.rand()).limit(sample_size) if model is not None: documents_sampled = filtered.count() model.documents_sampled = documents_sampled set_status('%d documents were sampled' % documents_sampled) return filtered
def make_basemap(basemap): try: set_status('getting document list', model=basemap) with ManagedSession() as session: filtered_query = create_query_for_model(session, basemap, dirty=False) extracted_terms = extract_terms(filtered_query, basemap.term_type) if not extracted_terms: raise Exception('No documents found matching query!') map_dict, graph_terms, phrase_frequencies, unnormed_dict, phrase_scores = map_representation(extracted_terms, ranking_algorithm=basemap.ranking_algorithm, similarity_algorithm=basemap.similarity_algorithm, filtering_algorithm=basemap.filtering_algorithm, number_of_terms=basemap.number_of_terms, model=basemap) # map_string will be a graphviz-processable string # map_string = write_dot.output_pairs_dict(map_dict, True, phrase_frequencies=phrase_frequencies, true_scaling=True).decode('ascii', 'ignore') map_string = write_dot.output_pairs_dict(map_dict, True, phrase_frequencies=phrase_frequencies, true_scaling=True, similarities=unnormed_dict, phrase_scores=phrase_scores).decode('ascii', 'ignore') # save to database basemap.dot_rep = map_string # basemap.phrase_frequencies = json.dumps(jsonize_phrase_dict(phrase_frequencies), indent=4).decode('ascii', 'ignore') # get phrases as a list of lists of strings (one list of words per term) basemap.phrases_in_map = json.dumps(jsonize_phrase_set(graph_terms, None)).decode('ascii', 'ignore') basemap.save() svg_str, width, height = strip_dimensions(call_graphviz(map_string, file_format='svg', model=basemap)) basemap.svg_rep = svg_str basemap.width = width basemap.height = height basemap.finished = True basemap.save() set_status('basemap complete', model=basemap) print 'basemap complete' return map_dict, graph_terms except ZeroDivisionError as e: set_status('Error: too few documents to produce a map. Try a broader search', model=basemap)
def map_representation(structured_nps, start_words=None, ranking_algorithm=1, similarity_algorithm=2, filtering_algorithm=1, number_of_terms=1000, simplify_terms=False, model=None, data_dump_path=None): """returns a pair similarity dictionary for the map and set of terms in the map. Heatmap can be calculated seperately and then overlaid. Will need to convert dictionary representation to dot file format""" flattened = flatten(structured_nps) set_status('ranking terms', model=model) if start_words is not None: # start words should be a list like ["machine learning", "artificial intelligence"] start_words = [tuple(s.split()) for s in start_words] ranked_phrases, phrase_frequencies, scored_phrases = call_rank(ranking_algorithm, flattened, number_of_terms, start_words=start_words, model=model) else: ranked_phrases, phrase_frequencies, scored_phrases = call_rank(ranking_algorithm, flattened, number_of_terms, model=model) if simplify_terms: structured_nps = simplification.term_replacement(structured_nps, ranked_phrases) set_status('calculating similarity', model=model) sim_matrix, phrase_lookups = call_similarity(similarity_algorithm, structured_nps, ranked_phrases, model=model, status_callback=lambda s: set_status(s, model=model)) if data_dump_path: import pickle from os.path import join def prefix_path(rel): return join(data_dump_path, rel) with open(prefix_path('sim_matrix.pickle'), 'w') as f: pickle.dump(sim_matrix, f) with open(prefix_path('phrase_lookups.pickle'), 'w') as f: pickle.dump(phrase_lookups, f) with open(prefix_path('phrase_frequencies.pickle'), 'w') as f: pickle.dump(phrase_frequencies, f) phrase_pairs = call_filter(filtering_algorithm, sim_matrix, phrase_lookups, model=model) normed = similarity.similarity_dict_to_distance(phrase_pairs) # build set of terms in graph graph_terms = set() for term, lst in normed.items(): graph_terms.add(term) graph_terms.update(term for term, val in lst) return normed, graph_terms, phrase_frequencies, phrase_pairs, scored_phrases
def make_heatmap(heatmap, graph_terms): try: set_status('getting document list', model=heatmap) with ManagedSession() as session: filtered_query = create_query_for_model(session, heatmap, dirty=False) extracted_terms = extract_terms(filtered_query, heatmap.term_type) heatmap_terms = flatten(extracted_terms) heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms) heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity')) set_status('heatmap complete', model=heatmap) heatmap.finished = True heatmap.save() return heatmap_vals except Exception as e: set_status('Error: %s' % e, model=heatmap) raise e
def make_heatmap(heatmap, graph_terms): try: set_status('getting document list', model=heatmap) with ManagedSession() as session: heatmap_query= create_query(session, author=heatmap.author, institution=heatmap.institution) filtered_query = filter_query(heatmap_query, dirty=False, starting_year=heatmap.starting_year, ending_year=heatmap.ending_year, sample_size=heatmap.sample_size, model=heatmap) extracted_terms = extract_terms(filtered_query, heatmap.term_type) heatmap_terms = flatten(extracted_terms) heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms) heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity')) set_status('heatmap complete', model=heatmap) heatmap.finished = True heatmap.save() return heatmap_vals except Exception as e: set_status('Error: %s' % e, model=heatmap) raise e
def fix_primary_flat_file_data(desig, incorrect_list, correct_list, DELETING=False): ''' *** Need to be really careful about this *** *** Need to do something like ... *** *** (i) Find the relevant primary data file [can be in /sa/mpn or in tot*] *** (ii) Freeze the system (lock status) *** (iii) Copy primary data file to temp location (esp. while developing) *** (iv) Find the location of the incorrect data in the primary data file *** (v) Replace the incorrect data with the correct data (non-trivial : needs to have pubn-record, etc) *** (vi) Do some sense checks of the difference between the initial and fixed versions *** (vii) write the data to the temp file *** (viii) Replace the primary data with the fixed copy *** (ix) Unlock the system inputs: ------- returns: -------- ''' # We want to 'permanently' save some output files ... save_dir = '/sa/conchecks/data_products/' #*** (i) Find the relevant primary data file [can be in /sa/mpn or in tot*] src_files = [] for incorrect_published_obs80 in incorrect_list: src_files.extend( find_primary_data_file(desig, incorrect_published_obs80)) src_files = list(set(src_files)) print('src_files = ', src_files) assert len( src_files ), f'No src_file could be found that contains the incorrect data ... incorrect_list={incorrect_list}' #*** (ii) Freeze the system (lock status) # ~~~~~~~~~~~~~ IF WE CRAP OUT AT ANY POINT BELOW WE NEED TO RELEASE THE LOCK ~~~~~~~~~~~~~~~~~~ print('Setting mpc_temp_status') mpc_status.set_status("mpc_temp_status", "MJP_FIXING_PRIMARY_FLAT_FILES") try: #*** (iii) Copy primary data file to temp location (esp. while developing) # I am allowing for the possibility that there are multiple files to be fixed ... dst_dir = newsub.generate_subdirectory("obs_cons") for src_file in src_files: dst_file = os.path.join(dst_dir, os.path.basename(src_file)) print('dst_file = ', dst_file) shutil.copyfile(src_file, dst_file) # Read the primary data file with open(dst_file, 'r') as fh: data = fh.readlines() # Files to write to so that MR can update mysql bad_filepath = os.path.join(save_dir, desig + '_bad.dat') good_filepath = os.path.join(save_dir, desig + '_good.dat') print( f' bad_filepath= {bad_filepath} , good_filepath= {good_filepath} ' ) with open(bad_filepath, 'w') as bad_fh: with open(good_filepath, 'w') as good_fh: # If we are deleting duplicates ... if DELETING: seen = {} fixed_data = [] incorrect_dict = {_: True for _ in incorrect_list} for line in data: # If the lines are to be deleted, record to tell MR so that the mysql database can be updated if line.strip( '\n' ) in incorrect_dict and line not in seen: bad_fh.write(line) # If we are keeping the line ... else: fixed_data.append(line) # Record that we have seen the line so that we can stop ourselves deleting it twice! seen[line] = True # If not deleting, but doing replacement ... else: for incorrect_published_obs80, corrected_obs80 in zip( incorrect_list, correct_list): fixed_data = [] # Check the inputs assert corrected_obs80 not in ['', ' ', [], [''], ['','']], \ 'corrected_obs80 = {corrected_obs80} : not sure that this routine can cope with such input ...' assert isinstance( incorrect_published_obs80, str ), f'incorrect_published_obs80 is of type {type(incorrect_published_obs80)}, rather than a string' #*** (iv) Find the location of the incorrect data in the primary data file line_num = [ i for i, line in enumerate(data) if incorrect_published_obs80.strip() in line ] assert len( line_num ) < 3, 'len(line_num)={len(line_num)} which is >=3 which seems like a suspiciously large number so I am terminating...' #*** (v) Replace the incorrect data with the correct data (the correct data has been created earlier) # At the same time we also output the incorrect & correct data to some files to be used to update the MYSQL database for n, line in enumerate(data): if n not in line_num: # We keep the normal stuff as-is fixed_data.append(line) else: # For removal from mysql bad_fh.write(line) if isinstance(corrected_obs80, str): l = corrected_obs80 if corrected_obs80[ -1] == '\n' else corrected_obs80 + '\n' # Corrected data for flat files fixed_data.append(l) # Corrected data for mysql good_fh.write(l) elif isinstance(corrected_obs80, list): for _ in corrected_obs80: l = _ if _[-1] == '\n' else _ + '\n' # Corrected data for flat files fixed_data.append(l) # Corrected data for mysql good_fh.write(l) else: sys.exit( f'corrected_obs80 is of type{type(corrected_obs80)}: do not know how to process' ) #*** (vi) Do some sense checks of the difference between the initial and fixed versions assert len(fixed_data) - len(data) == len( line_num ), 'Lengths do not make sense: {len(fixed_data),len(data),len(line_num)} ' # copy fixed data into data ready for next loop around ... data = copy.deepcopy(fixed_data) #*** (vii) write the data to the temp file replace_file = dst_file + 'replace' assert not os.path.isfile( replace_file ), 'replacement file {replace_file} already exists which is bad' with open(replace_file, 'w') as fh: for line in fixed_data: l = line if line[-1] == '\n' else line + '\n' fh.write(l) assert os.path.isfile( replace_file ), 'replacement file {replace_file} does NOT exist which is bad' #*** (viii) Replace the primary data with the fixed copy print(f'replacing file={src_file} with file {replace_file} ') #shutil.copyfile(replace_file, src_file) #*** (ix) Recreate the index files if necessary # NEED TO BE CAREFUL ABOUT THIS ... # (a) Mike/Dave indicated this is only necessary if the file being altered is one of the permanent, # master files, rather than one of the temp *tot* files # (b) However, my inspection of /share/apps/mpec/publish_dou_mpec.sh, /share/apps/com/indexed/update.sh # (and sub-scripts) suggests that there *ARE* some form of index files for the temp/pending/within-month files # (c) TO gain some understanding, the monthly-prep rebuilds are done here : /sa/com/indexed/update.sh [SAME AS ABOVE] # (d) Given that ... calls /share/apps/com/indexed/buildnumupd.sh #*** (xi) Remove / Tidy-up the temp files & temp dir #shutil.rmtree(dst_dir) #assert not os.path.isdir(dst_dir), f'dst_dir={dst_dir} still exists when it should NOT' #*** (ix) Unlock the system print('Unsetting the mpc_temp_status') mpc_status.set_status("mpc_temp_status", "") except Exception as e: print('\n' * 2) print('EXCEPTION IN fix_primary_flat_file_data') print('\n' * 2) print(e) print('\n' * 2) print( 'Unsetting the mpc_temp_status as part of the EXCEPTION handling') mpc_status.set_status("mpc_temp_status", "") return True