Python getDataPath Examples

Programming Language: Python

Namespace/Package Name: dataProcessHelper

Method/Function: getDataPath

Examples at hotexamples.com: 3

Python getDataPath - 3 examples found. These are the top rated real world Python examples of dataProcessHelper.getDataPath extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

import dataProcessHelper as dph
import pyLDAvis.sklearn

from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer;
from sklearn.decomposition import LatentDirichletAllocation as LDA;

from sklearn.preprocessing import normalize;
import pickle;
from sklearn.model_selection import GridSearchCV

import dataProcessHelper as dph

numberTopics = 10
columnName = 'combCleanLemma'

path = dph.getDataPath('pressBiTriLemma.json')
df = pd.read_json(path)

dfBigramLemma = df[columnName]

df2 = pd.DataFrame(dfBigramLemma)
df2[columnName] = df2.apply(lambda row: ' '.join(map(str, row[columnName])), axis=1)


vectorizer = TfidfVectorizer(strip_accents = 'unicode', ngram_range = (1,2));
xTrainTfidf = vectorizer.fit_transform(df2[columnName]);

searchParams = {'n_components': [10], 'learning_decay': [.5]}
if True:
    model = LDA()
    model = GridSearchCV(model, searchParams)

Example #2

Show file

File: GensimHdpPipeline.py Project: RotFrost/topicModeling

    extraDict = {
        'removeWords':
        ['bundes'
         ],  #Bug: Wörter mit Umlauten wie 'für' lassen sich nicht filtern.
        'alpha': 1,
        'T': 150,
        'tau': 64
    }

    #   Data-Preperation:

    ##  Vervollständig das SettingsDict mit default Parametern
    settingsDict = dph.completeSettingsDict(settingsDict)

    ##  Erstellt die Pfade für die Daten und dem Model. Der Dateiname für die Daten wird aus dem SettingsDict zusammengesetzt.
    filePath = dph.getDataPath(
        os.path.join('autoCreation', dph.getName(settingsDict)))
    modelPath = dph.getModelPath(os.path.join(modelType, str(uuid.uuid1())))

    ##  Erstellt oder lädt die Daten, falls schon vorhanden.
    df = dph.dataCleaningPipeline('press42.json', settingsDict)
    df = dph.removingWords(df, settingsDict['gensimPreProcess'],
                           extraDict['removeWords'])

    ##  Nachträgliches kleinschreiben der Wörter.
    if False:
        df = dph.toLower(df, settingsDict['gensimPreProcess'])
        extraDict['toLower'] = True

    ##  Erstellt das Bag of Words Model und ein Dictionary.
    bowCorpus, dictionary = dph.getCorpus(df[settingsDict['gensimPreProcess']],
                                          corpusDict['noBelow'],

Example #3

Show file

File: cleanBiTrigram-checkpoint.py Project: RotFrost/topicModeling

import de_core_news_sm
nlp = de_core_news_sm.load()
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')

import gensim

import dataProcessHelper as dph

richTextFilter = ['Zur externen Meldung', 'Zur externen Pressemitteilung']
allowedPostags = ['NOUN', 'VERB']  #['NOUN', 'ADJ', 'VERB', 'ADV']
#'Deutschland', 'Bundesregierung', 'wichtig', 'erklärte'

#Combine the columns 'shortText' and 'richText'. What is with the title?
if True:
    path = dph.getDataPath('press42.json')
    df = pd.read_json(path)
    df['combText'] = df.apply(lambda row: df['shortText'].iloc[row.name]
                              if df['richText'].iloc[row.name] in
                              richTextFilter else dph.combineStrings(df, row),
                              axis=1)
    dph.saveAsJson(df, 'press42Combined.json')
#Load the combined data
if True:
    path = dph.getDataPath('press42Combined.json')
    df = pd.read_json(path)

if True:
    df['combClean'] = df.apply(
        lambda row: dph.removeStopPunctSpace(df['combText'].iloc[row.name]),
        axis=1)