def update_index(self): """Function to download new files from CMS and add them to our MongoDB database """ for course_name, files in self.scraper.get_courses_docs(): if not files: continue print("Checking", course_name, "for new docs.") course_docs = Doc.objects(course=course_name).only( "file_path") # Get all the docs of the course doc_paths = set(doc.file_path for doc in course_docs) for file in files: file_path: Path = file["file_path"] if file_path.suffix not in self.ALLOWED_EXTS: continue sanitized_path = str(sanitize_filepath( file_path)) # Remove illegal characters from the path if sanitized_path in doc_paths: # TODO: Also check updated_at of file continue # Already processed the file print("\tDownloading", file_path.name, end=". ") save_path = get_real_path(sanitized_path) self.scraper.download_file(save_path, file["file_url"]) print("Done.") doc = Doc( file_path=sanitized_path, course=course_name, downloaded_at=datetime.now(), ) doc.save() # Add the new doc to DB sentences = extract_sentences(save_path) self.add_to_index(doc, sentences)
def process_query(self, query): """ Computes and retrieves the result of the query Args: query (str): The search query given by the user. Returns: list : It contians the document paths and the best 5 sentences for the corresponding document . """ query = self.prep.preprocess(query) ranks = self.fetch_top_n(query) ans = [] for r in ranks: file_path = Path(r[0].file_path) # print(file_path.name,file_path.parent.parent.parent ) new_path = file_path.with_suffix(".json") new_path = get_real_path(new_path) with open(new_path, "r") as f: data = set(json.load(f)["sentences"]) sen = tuple((self.jc_sim(s, query), s) for s in data) best = tuple(sorted( sen, key=lambda x: -x[0]))[:5] # Slice top five sentences ans.append((file_path, best)) return ans
def display_two_plots(self, f): # If somone gives us a ~/XXXX path we need to extract the full path f = ut.same_folder_different_file(self.f_name, f) f = ut.get_real_path(f) if not self.last_command.__eq__(""): cntr_t = self.contour_type.currentText() dcmin, dcmax = self.control.get_contour_limits(cntr_t) c_min = self.cMin.text() c_max = self.cMax.text() if self.last_command.__eq__("mesh"): self.control.plot_additional_mesh(f) self.set_title_name("Mesh ") self.last_command = "mesh" self.post_plot() elif self.last_command.__eq__("heatmap"): cntr_t = self.contour_type.currentText() dcmin, dcmax = self.control.get_contour_limits(str(cntr_t)) c_min = self.cMin.text() c_max = self.cMax.text() if c_min == c_max or (self.contourLogScale.isChecked() and float(c_min) < 0): self.error_message("Error in contour limits.\n") else: self.control.plot_additional_heatmap(cntr_t, c_min, c_max, f, self.contourLogScale.isChecked()) self.set_title_name("Color map ") self.last_command = "heatmap" if c_min == dcmin and c_max == dcmax: self.reset_contour_callback() self.post_plot() if not self.last_command.__eq__("") and c_min == dcmin and c_max == dcmax: self.reset_contour_callback() if self.last_command == "": self.set_limit_text()
def validate_index(f): fl = FileList() fl.ParseFromString(open(f, 'rb').read()) for f in fl.files: url = get_real_path(f) try: if os.path.getsize(url)!=f.size: print url except OSError: print url
def validate_index(f): fl = FileList() fl.ParseFromString(open(f, 'rb').read()) for f in fl.files: url = get_real_path(f) try: if os.path.getsize(url) != f.size: print url except OSError: print url
def export_package(index_file, outdir, unpack=False): if os.path.exists(outdir): print '目标目录已存在' exit(1) fl = FileList() fl.ParseFromString(open(index_file, 'rb').read()) assets_dir = os.path.dirname(index_file) for f in fl.files: p = get_real_path(f) print 'copy file', p if unpack: target = os.path.join(outdir, f.url) else: target = os.path.join(outdir, p) ensure_directory_exists(target) shutil.copyfile(os.path.join(assets_dir, p), target) shutil.copyfile(os.path.join(index_file), os.path.join(outdir, 'filelist'))
def export_package(index_file, outdir, unpack=False): if os.path.exists(outdir): print "目标目录已存在" exit(1) fl = FileList() fl.ParseFromString(open(index_file, "rb").read()) assets_dir = os.path.dirname(index_file) for f in fl.files: p = get_real_path(f) print "copy file", p if unpack: target = os.path.join(outdir, f.url) else: target = os.path.join(outdir, p) ensure_directory_exists(target) shutil.copyfile(os.path.join(assets_dir, p), target) shutil.copyfile(os.path.join(index_file), os.path.join(outdir, "filelist"))
def file_load_callback(self): ok = self.f_dialog.exec_() if ok and self.f_dialog.filePath.text() != "": #we need to change the file f_name = str(self.f_dialog.filePath.text()) f_name = str(ut.get_real_path(f_name, self.f_name)) self.control.set_additional_plot(self.f_dialog.applyChkBox.isChecked(), f_name)