nlp = spacy.load("en_core_web_sm") track = None vol = None with open(FILE_NAME + ".tsv", 'w') as f: writer = csv.writer(f, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow( ["Title", "Authors", "Pdf", "Presentation", "Volume_Name", "Notes"]) for p in page_lines: #get the track if is_centered(p): print("New track name:", track) track = is_centered(p) continue raw_text = unspace(p.get_text()) pdf = None title = None if has_pdf(p): pdf = has_pdf(p) pdf = urljoin(URL, pdf) #print(pdf) presentation = None if "presentation" in p.text:
nlp = spacy.load("en_core_web_sm") track = None vol = None with open(FILE_NAME + ".tsv", 'w') as f: writer = csv.writer(f, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow( ["Title", "Authors", "Pdf", "Presentation", "Volume_Name", "Notes"]) for p in page_lines: #get the track if is_centered(p): print("New track name:", track) # track = is_centered(p) continue raw_text = unspace(p.get_text()) pdf = None title = None if has_pdf(p): pdf = has_pdf(p) pdf = urljoin(URL, pdf) #print(pdf) presentation = None if "presentation" in p.text: