for para in doc.paragraphs:
                text += para.text
            #summary=model(text)
            corpus.append(text)  #### stop and stem not applied here
            #summarizedtext.append(summary)
            filenameraw.append([os.path.split(doc)[1].split('.')[0]])

    except:
        print(doc)

#*********************************************************************************

#****************creating tagged corpus
tagged_cr = []
for idx, doc in enumerate(corpus):
    tagged_cr.append(TaggedDocument(words=doc.split(), tags=[idx]))

#*********model prep
model = gensim.models.doc2vec.Doc2Vec(vector_size=100,
                                      min_count=1,
                                      epochs=80,
                                      alpha=0.025)
model.build_vocab(tagged_cr)
model.train(tagged_cr, total_examples=model.corpus_count, epochs=model.epochs)

#vector = model.infer_vector(['dancing', 'reading', 'theatre', 'machine', 'learning', 'skills'])
#print(vector)

##Overfitting Test
ranks = []
second_ranks = []