def __init__(self, runtime_total: int = 10, runtime_between_slices: int = 10, runtime_forever: bool = False, out_directory: str = "./", track_keywords: list = custom_stopwords.main(), compression: bool = True): """ Creates an instance of the GenerateDataset class. Requires: - 'runtime_total' specifies the total runtime of the data gathering. - 'runtime_between_slices' specifies how long it will take between each save. If this parameter = runtime_total, then only one file will be created. - 'runtime_forever' will ignore 'runtime_total' - 'out_directory' is simply where the data will be saved - 'track_keywords' is what the the Twitter API will send back. If this is left empty, then only stopwords will be tracked. - 'compression'=True will compress all saved files. """ self.runtime_total = runtime_total self.runtime_between_slices = runtime_between_slices self.runtime_forever = runtime_forever self.out_directory = out_directory self.track_keywords = track_keywords self.compression_enabled = compression
def clean_stopwords(content:str) -> str: """ This method removes stop-words from a string before returning it back. """ content = content.split() filtered = [item for item in content if not item in custom_stopwords.main()] return ' '.join(filtered)
def test(_path="../DataCollection/", _time_total=10, _time_between_slices=10, _track=None): run_forever = False if _track is None: _track = custom_stopwords.main() gen = Generate_Dataset(_time_total, _time_between_slices, run_forever, _path, _track) gen.run_collector() print("terminated")
def clean_stopwords(content:str) -> str: content = content.split() filtered = [item for item in content if not item in custom_stopwords.main()] return ' '.join(filtered)
#from packages.graphical.generate_wordcloud import write_csv from packages.cleaning.custom_stopwords import main #mods import csv import seaborn as sb generator = gd( _runtime_total = 20, _runtime_between_slices = 20, _runtime_forever = False, _out_directory = 'C:/Users\Erlend-PC/Documents/Coding/Noodle/TwitterNoodle-master/packages/', _track_keywords = main()) generator.run_collector() # // MERGE # scale = DatasetScalingHelper(_verbosity = True) # scale.set_dir_input('C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\packages\\zIN') # scale.set_dir_output('C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\packages\\zOUT') # scale.merge_datasets_by_directory() # fp = 'C:\\Users\\Joakim\\Desktop\\TwitterNoodle-master\\200116-15_06_06--200116-15_06_11.csv'