def paper_title_NLP(title_corpus):

    # title_corpus is a list of tuple
    # keys like (19,1), means 2019/01
    # value is a list of paper titles after tokenized
    # referece: https://stackoverflow.com/questions/36353125/nltk-regular-expression-tokenizer
    title_dict = {}
    pattern = r'''(?x)            # set flag to allow verbose regexps
            (?:[A-Z]\.)+          # abbreviations, e.g. U.S.A.
            | \w+(?:-\w+)*        # words with optional internal hyphens
            | \$?\d+(?:\.\d+)?%?  # currency and percentages, e.g. $12.40, 82%
            | \.\.\.              # ellipsis
            | [][.,;"'?():_`-]    # these are separate tokens; includes ], [
            '''
    tokenizer = RegexpTokenizer(pattern)
    for t in title_corpus:
        key = (t[3], t[4])
        if key in title_dict:
            filterdText = tokenizer.tokenize(t[1])
            title_dict[key].append(filterdText)
        else:
            title_dict[key] = []
            filterdText = tokenizer.tokenize(t[1])
            title_dict[key].append(filterdText)

    # extract keywords with year span
    title_years = {}
    for k, v in title_dict.items():
        key = (k[0], )  # year index
        if key in title_years.keys():
            title_years[key].append(v)
        else:
            title_years[key] = []
            title_years[key].append(v)

    deep_freq = []
    for k, v in title_years.items():
        fd = FreqDist()
        vs = [item for sublist in v for item in sublist]
        for v_ in vs:
            for word in v_:
                fd[word] += 1

        print('The keywords for year:20{}'.format(str(k[0])))
        print("Total number of words:{}".format(str(
            fd.N())))  # total number of samples
        print("Total number of unique words:{}".format(str(
            fd.B())))  # number of bins or unique samples
        fd.pprint(50)  # The maximum number of items to display, default is 10
        deep_freq.append(fd.freq('Deep') + fd.freq('deep'))
        print(deep_freq)

    plt.plot([2012, 2013, 2014, 2015, 2016, 2017, 2018], deep_freq)
    plt.ylabel('frequency of deep word')
    plt.xlabel('years')
    plt.show()