Esempio n. 1
0
    def run(self):
        if self.name == 'functional_load':
            try:
                results = minpair_fl(self.kwargs['corpus'],
                                     self.kwargs['segment_pair'],
                                     stop_check=self.kwargs['stop_check'],
                                     call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'string_similarity':
            try:
                results = string_similarity(
                    self.kwargs['corpus'],
                    self.kwargs['query'],
                    self.kwargs['algorithm'],
                    stop_check=self.kwargs['stop_check'],
                    call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'phonotactic_probability':
            try:
                results = phonotactic_probability_vitevitch(
                    self.kwargs['corpus'],
                    self.kwargs['query'],
                    self.kwargs['sequence_type'],
                    probability_type=self.kwargs['probability_type'],
                    stop_check=self.kwargs['stop_check'],
                    call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        elif self.name == 'kullback_leibler':
            try:
                results = KullbackLeibler(self.kwargs['corpus'],
                                          self.kwargs['seg1'],
                                          self.kwargs['seg2'],
                                          self.kwargs['side'],
                                          stop_check=self.kwargs['stop_check'],
                                          call_back=self.kwargs['call_back'])
                self.dataReady.emit(results)
            except Exception as e:
                message = '{}:{}'.format(self.name, e)
                self.errorEncountered.emit(message)
                return

        else:
            raise UnLuckyException(
                'No analysis function called {} could be found'.format(
                    self.name))
Esempio n. 2
0
 def run(self):
     time.sleep(0.1)
     kwargs = self.kwargs
     self.results = []
     context = kwargs.pop('context')
     if context == ContextWidget.canonical_value:
         cm = CanonicalVariantContext
     elif context == ContextWidget.frequent_value:
         cm = MostFrequentVariantContext
     elif context == ContextWidget.separate_value:
         cm = SeparatedTokensVariantContext
     elif context == ContextWidget.relative_value:
         cm = WeightedVariantContext
     with cm(kwargs['corpus'],
             kwargs['sequence_type'],
             kwargs['type_token'],
             frequency_threshold=kwargs['frequency_cutoff']) as c:
         try:
             for pair in kwargs['segment_pairs']:
                 res = KullbackLeibler(c,
                                       pair[0],
                                       pair[1],
                                       outfile=None,
                                       side=kwargs['side'],
                                       stop_check=kwargs['stop_check'],
                                       call_back=kwargs['call_back'])
                 if self.stopped:
                     break
                 self.results.append(res)
         except PCTError as e:
             self.errorEncountered.emit(e)
             return
         except Exception as e:
             e = PCTPythonError(e)
             self.errorEncountered.emit(e)
             return
     if self.stopped:
         self.finishedCancelling.emit()
         return
     self.dataReady.emit(self.results)
Esempio n. 3
0
def main():

    #### Parse command-line arguments
    parser = argparse.ArgumentParser(
        description='Phonological CorpusTools: Kullback-Leibler CL interface')
    parser.add_argument(
        'corpus_file_name',
        help=
        'Path to corpus file. This can just be the file name if it\'s in the same directory as CorpusTools'
    )
    parser.add_argument('seg1', help='First segment')
    parser.add_argument('seg2', help='Second segment')
    parser.add_argument(
        'side',
        help=
        'Context to check. Options are \'right\', \'left\' and \'both\'. You can enter just the first letter.'
    )
    parser.add_argument(
        '-s',
        '--sequence_type',
        default='transcription',
        help=
        "The attribute of Words to calculate KL over. Normally this will be the transcription, but it can also be the spelling or a user-specified tier."
    )
    parser.add_argument(
        '-t',
        '--type_or_token',
        default='token',
        help='Specifies whether entropy is based on type or token frequency.')
    parser.add_argument(
        '-c',
        '--context_type',
        type=str,
        default='Canonical',
        help=
        "How to deal with variable pronunciations. Options are 'Canonical', 'MostFrequent', 'SeparatedTokens', or 'Weighted'. See documentation for details."
    )
    parser.add_argument('-o',
                        '--outfile',
                        help='Name of output file (optional)')

    args = parser.parse_args()

    ####

    try:
        home = os.path.expanduser('~')
        corpus = load_binary(
            os.path.join(home, 'Documents', 'PCT', 'CorpusTools', 'CORPUS',
                         args.corpus_file_name))
    except FileNotFoundError:
        corpus_path = args.corpus_file_name
        if not os.path.isfile(corpus_path):
            corpus_path = os.path.join(os.getcwd(), corpus_path)
        corpus = load_binary(corpus_path)

    if args.context_type == 'Canonical':
        corpus = CanonicalVariantContext(corpus, args.sequence_type,
                                         args.type_or_token)
    elif args.context_type == 'MostFrequent':
        corpus = MostFrequentVariantContext(corpus, args.sequence_type,
                                            args.type_or_token)
    elif args.context_type == 'SeparatedTokens':
        corpus = SeparatedTokensVariantContext(corpus, args.sequence_type,
                                               args.type_or_token)
    elif args.context_type == 'Weighted':
        corpus = WeightedVariantContext(corpus, args.sequence_type,
                                        args.type_or_token)

    results = KullbackLeibler(corpus,
                              args.seg1,
                              args.seg2,
                              args.side,
                              outfile=None)

    outfile = args.outfile
    if outfile is not None:
        if not os.path.isfile(outfile):
            outfile = os.path.join(os.getcwd(), outfile)
        if not outfile.endswith('.txt'):
            outfile += '.txt'

        with open(outfile, mode='w', encoding='utf-8-sig') as f:
            print(
                'Seg1,Seg2,Seg1 entropy,Seg2 entropy,Possible UR, Spurious UR\n\r',
                file=f)
            print(','.join([str(r) for r in results]), file=f)
            print(
                '\n\rContext,Context frequency,{} frequency in context,{} frequency in context\n\r'
                .format(seg1, seg2),
                file=f)
            for context, result in allC.items():
                cfrequency = freq_c[context] / totalC
                print('{},{},{},{}\n\r'.format(context, cfrequency,
                                               result.seg1 / result.sum(),
                                               result.seg2 / result.sum()),
                      file=f)
        print('Done!')

    else:
        print(results)