def __init__(self, posType="RAW"):
        self.l = Locations()
        self.mobypos = dict()

        self.posdef = {
            'N': 'Noun',
            'p': 'Plural',
            'h': 'Noun Phrase',
            'V': 'Verb (usu participle)',
            't': 'Verb (transitive)',
            'i': 'Verb (intransitive)',
            'A': 'Adjective',
            'v': 'Adverb',
            'C': 'Conjunction',
            'P': 'Preposition',
            '!': 'Interjection',
            'r': 'Pronoun',
            'D': 'Definite Article',
            'I': 'Indefinite Article',
            'o': 'Nominative',
            'N/A': 'N/A',
            'e': 'UNKNOWN'
        }
        self.loadRawPOS()
        if "CONDENSED" in posType:
            self.condensePOS()
 def __init__(self, stressType, dictDirection="ORTH_TO_PHON"):
     self.l = Locations()
     self.myBASE = self.l.mybase
     self.stressType = stressType
     self.CMUDict = {}
     self.arpaToKlatt = {}
     self.loadArpaToKlatt()
     # print "loading CMU Pronounciation dict"
     self.translateDictToKlatt(dictDirection)
    def __init__(self):
        """

        """
        self.l = Locations()
        self.phon = PhonologicalFromOrthographic()
        self.mobypos = MobyPOS(self.l.mybase)
        self.mremove = list(['Conjunction', 'Interjection', 'Pronoun',
                        'Preposition', 'Definite Article', 'Indefinite Article'])
        self.elppos = EnglishLexiconProjectPOS()
        self.closed = ClosedClass()

        self.keep_as_is = set()
        self.remove_not_representative = set()
        self.keep_to_root = set()
        self.inflect_to_root = dict()
    def __init__(self):
        # project specific file locations and variable names
        self.l = Locations()
        self.cv = CreateVariables()
        self.cv.loadExperimentalVars()
        self.r = ProxyAcqConvTrscr()

        # all child words
        self.c3 = self.cv.threeVars
        self.c4 = self.cv.fourVars
        self.c6 = self.cv.sixVars

        # all adult words
        self.a3 = self.cv.threeAdultVars
        self.a4 = self.cv.fourAdultVars
        self.a6 = self.cv.sixAdultVars

        # all child with adjusted use
        self.c3a = self.cv.threeVars[
            self.cv.threeVars[self.cv.token_adult] > 0]
        self.c4a = self.cv.fourVars[self.cv.fourVars[self.cv.token_adult] > 0]
        self.c6a = self.cv.sixVars[self.cv.sixVars[self.cv.token_adult] > 0]

        # child multisyllabic words and vars - all
        self.m3a = self.cv.threeVars[self.cv.threeMultiAll]
        self.m4a = self.cv.fourVars[self.cv.fourMultiAll]
        self.m6a = self.cv.sixVars[self.cv.sixMultiAll]

        # child multisyllabic words and vars - adult token > 0
        self.m3 = self.cv.threeVars[self.cv.threeMulti]
        self.m4 = self.cv.fourVars[self.cv.fourMulti]
        self.m6 = self.cv.sixVars[self.cv.sixMulti]

        # child CVC words and vars - adult token > 0
        self.cvc3 = self.cv.threeVars[self.cv.threeCVC]
        self.cvc4 = self.cv.fourVars[self.cv.fourCVC]
        self.cvc6 = self.cv.sixVars[self.cv.sixCVC]

        self.all_word_stats()
        self.positive_control_stats()
        self.multisyllabic_stats()
    def __init__(self):

        # project specific file locations and variable names
        self.l = Locations()
        self.findShape = Syllabifier('STRESS')
        self.sim = SONSimilarity(list())
        self.fau = ProxyAcqConvTrscr()

        # labels for columns in pandas
        self.orthographic = 'orthographic'
        self.phonological = 'phonological'
        self.syllables = 'syllables'
        self.onset_nucleus = 'onset_nucleus'
        self.onset_nucleus_coda = 'onset_nucleus_coda'

        # indexes for numeric variables
        self.length_syllables = 'length_syllables'
        self.length_phonemes = 'length_phonemes'
        self.str_pos = 'stressed_syll_position'
        self.pct_child = 'percent_child'
        self.pct_adult = 'percent_adult'
        self.token_child = 'token_child'
        self.token_adult = 'token_adult'

        # SON and PHON neighborhood density
        self.phon_n_density = 'SAD_density'
        self.onset_nucleus_density = 'onset_nucleus_density'
        self.onset_nucleus_coda_density = 'onset_nucleus_coda_density'

        # PHON neighborhood frequency
        self.son_frequency = 'SON_frequency'
        self.sad_frequency_pct_raw = 'SAD_frequency_pct_child_raw'

        # different ways of creating PACT values
        self.fau_pct_tok_p1 = 'fau_poly1'
        self.fau_pct_tok_p2 = 'fau_poly2'
        self.fau_tok_tok_p1 = 'fau_token_poly1'
        self.fau_tok_tok_p2 = 'fau_token_poly2'
        self.fau_pct_pct_p1 = 'fau_pct_poly1'
        self.fau_pct_pct_p2 = 'fau_pct_poly2'
    def __init__(self):
        """ Initializes needed modules, and creates child and adult lexicons"""

        # project specific file locations and variable names
        self.loc = Locations()
        self.we = WordEvaluation()
        self.we.load_evaluated_words()
        self.phon = PhonologicalFromOrthographic()

        # number of transcripts at each age
        self.n3 = 747
        self.n4 = 683
        self.n6 = 696

        # creating child lexicons from RWL and word evaluation
        self.child3 = self.apply_word_evaluation(self.loc.threeChildSALT)
        self.child4 = self.apply_word_evaluation(self.loc.fourChildSALT)
        self.child6 = self.apply_word_evaluation(self.loc.sixChildSALT)

        # creating adult lexicons from RWL and word evaluation
        self.adult3 = self.apply_word_evaluation(self.loc.threeAdultSALT)
        self.adult4 = self.apply_word_evaluation(self.loc.fourAdultSALT)
        self.adult6 = self.apply_word_evaluation(self.loc.sixAdultSALT)
 def __init__(self):
     # project specific file locations and variable names
     self.l = Locations()
     self.cv = CreateVariables()
     self.cv.loadExperimentalVars()
     self.bootstrap_num = 1000
Exemple #8
0
 def __init__(self):
     self.l = Locations()
     self.location = self.l.english_lexicon_project
     self.elp_pos = dict()
     self.pos_map = self.load_pos_map()
     self.loadPOSFromFile()
Exemple #9
0
        f = open(rwl_file, "r")
        f.readline()
        for line in f.readlines():
            wordInfo = line.strip()
            wordInfo = wordInfo.split(",")
            #
            orth_words[wordInfo[0].lower()]['TOKEN'] = wordInfo[3]
            #
            orth_words[wordInfo[0].lower()]['NUMCHILD'] = wordInfo[1]
        f.close()

        return orth_words


if __name__ == "__main__":
    TEST = RootWordList()
    L = Locations()
    print('entries from three adult RWL',
          len(TEST.orthographic_from_rwl(L.threeAdultSALT)))
    print('entries from three child RWL',
          len(TEST.orthographic_from_rwl(L.threeChildSALT)))
    print('entries from four adult RWL',
          len(TEST.orthographic_from_rwl(L.fourAdultSALT)))
    print('entries from four child RWL',
          len(TEST.orthographic_from_rwl(L.fourChildSALT)))
    print('entries from six adult RWL',
          len(TEST.orthographic_from_rwl(L.sixAdultSALT)))
    print('entries from six child RWL',
          len(TEST.orthographic_from_rwl(L.sixChildSALT)))
Exemple #10
0
 def __init__(self):
     self.l = Locations()
     self.myBASE = self.l.mybase
     self.closedclass = set()
     self.loadClosedClass()