def download(self): """ Download Stanford Sentiment Treebank to data directory """ path = Path(self.filename) path.parent.mkdir(parents=True, exist_ok=True) generic_download( url="https://s3.amazonaws.com/enso-data/SST-binary.csv", text_column="Text", target_column="Target", filename=SST_FILENAME)
def _download_data(cls): #Download Stanford Sentiment Treebank to data directory path = Path(cls.classifier_dataset_path) if path.exists(): return path.parent.mkdir(parents=True, exist_ok=True) generic_download( url="https://s3.amazonaws.com/enso-data/SST-binary.csv", text_column="Text", target_column="Target", filename=SST_FILENAME ) #Download Reuters Dataset to enso `data` directory path = Path(cls.sequence_dataset_path) if not path.exists(): path.parent.mkdir(parents=True, exist_ok=True) if not os.path.exists(cls.sequence_dataset_path): url = "https://raw.githubusercontent.com/dice-group/n3-collection/master/reuters.xml" r = requests.get(url) with open(cls.sequence_dataset_path, "wb") as fp: fp.write(r.content) with codecs.open(cls.sequence_dataset_path, "r", "utf-8") as infile: soup = bs(infile, "html.parser") docs = [] docs_labels = [] for elem in soup.find_all("document"): texts = [] labels = [] # Loop through each child of the element under "textwithnamedentities" for c in elem.find("textwithnamedentities").children: if type(c) == Tag: if c.name == "namedentityintext": label = "Named Entity" # part of a named entity else: label = "<PAD>" # irrelevant word texts.append(c.text) labels.append(label) docs.append(texts) docs_labels.append(labels) with open(cls.processed_path, 'wt') as fp: json.dump((docs, docs_labels), fp)
def download(self): """ Download Stanford Sentiment Treebank to data directory """ path = Path(self.filename) path.parent.mkdir(parents=True, exist_ok=True) generic_download( url="https://s3.amazonaws.com/enso-data/SST-binary.csv", text_column="Text", target_column="Target", filename=SST_FILENAME )
def download(self): """ Download Stanford Sentiment Treebank to data directory """ path = Path(self.filename) path.parent.mkdir(parents=True, exist_ok=True) generic_download( url= "https://www.figure-eight.com/wp-content/uploads/2016/03/McDonalds-Yelp-Sentiment-DFE.csv", text_column="review", target_column="policies_violated", filename=SST_FILENAME, target_transformation=target_transform)
def download(self): """ Download Stanford Sentiment Treebank to data directory """ path = Path(self.filename) path.parent.mkdir(parents=True, exist_ok=True) generic_download( url="https://www.figure-eight.com/wp-content/uploads/2016/03/McDonalds-Yelp-Sentiment-DFE.csv", text_column="review", target_column="policies_violated", filename=SST_FILENAME, target_transformation=target_transform )
def _download_sst(cls): """ Download Stanford Sentiment Treebank to data directory """ path = Path(cls.dataset_path) if path.exists(): return path.parent.mkdir(parents=True, exist_ok=True) generic_download( url="https://s3.amazonaws.com/enso-data/SST-binary.csv", text_column="Text", target_column="Target", filename=SST_FILENAME)