Python DocumentGenerator.paragraph Beispiele, essential_generators.DocumentGenerator.paragraph Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: mentain_accounts.py Projekt: Stephanehk/Auto_Reviewer

def send_email():
    #chose random sender and reciever
    with open('generated_accounts.txt') as json_file:
        data = json.load(json_file)
    sender = random.choice(data["accounts"])
    sender_user = sender["username"] + "@yandex.com"
    sender_password = sender["password"]

    reciever_user = random.choice(data["accounts"])["username"] + "@yandex.com"

    #generate random body paragraph and send email
    gen = DocumentGenerator()
    body = gen.paragraph()
    msg = MIMEText(body)
    msg["Subject"] = gen.sentence()
    msg["From"] = sender_user
    msg["To"] = reciever_user

    s = smtplib.SMTP_SSL('smtp.yandex.ru', 465)
    #idk what this does
    s.ehlo()
    #s.starttls()

    s.set_debuglevel(1)
    s.login(sender_user, sender_password)
    s.sendmail(sender_user, [reciever_user], msg.as_string())
    s.quit()
    print("sent mail")

Beispiel #2

0

Datei anzeigen

 def handle(self, *args, **options):
     Tag.objects.bulk_create([Tag(tag=t[0], slug=t[1]) for t in tags],
                             ignore_conflicts=True)
     names = generate_username(int(options["num_users"]))
     User = get_user_model()
     users = [
         User.objects.create_user(username=n, password=self.password)
         for n in names
     ]
     print(users)
     gen = DocumentGenerator()
     gen.init_word_cache(5000)
     gen.init_sentence_cache(5000)
     for user in users:
         user = User.objects.get(username=user.username)
         user.profile.bio = gen.sentence()
         user.profile.save()
         articles = Article.objects.bulk_create([
             Article(
                 slug=make_slug(gen.sentence()),
                 title=gen.sentence(),
                 description=gen.sentence(),
                 body=gen.paragraph(),
                 author=user.profile,
             )
             # Make sure every user has at least 1 article
             for _ in range(random.randrange(1, self.article_upper_bound))
         ])
         print(articles)
         # Make sure every article has 1 tag, could add more later
         for article in articles:
             article.tags.add(Tag.objects.get(slug=random.choice(tags)[1]))
     self.stdout.write(self.style.SUCCESS(f"Created {len(users)} users"))

Beispiel #3

0

Datei anzeigen

Datei: populatedb.py Projekt: gurupratap-matharu/code-atlas

    def handle(self, *args, **options):
        gen = DocumentGenerator()
        user_count = 1
        index = 1
        num_notes = None
        user = None

        while True:
            if index == 1:
                user = User.objects.create_user(
                    email=f'{user_count}@email.com')
                num_notes = random.randint(20, 100)
            elif index > num_notes:
                index = 1
                user_count += 1
                print(f'Created Notes for User [{user_count}/10000]', end='\r')
                continue

            note = Note.objects.create(user=user,
                                       title=gen.sentence(),
                                       content=gen.paragraph())
            note.tags = gen.word()

            index += 1

        self.stdout.write(self.style.SUCCESS('Successfully loaded data'))

Beispiel #4

0

Datei anzeigen

class TestCaseGenerator:
    def __init__(
        self,
        sample_letters=None,
        enabled_repetition=False,
        enabled_alphanumeric=False,
        enabled_special_character=False,
    ):
        self.gen = DocumentGenerator()
        self.sample_letters = sample_letters
        self.enabled_repetition = enabled_repetition
        self.enabled_alphanumeric = enabled_alphanumeric
        self.enabled_special_character = enabled_special_character

    def parametric_test_case_init(self):
        if self.sample_letters == None:
            if self.enabled_special_character:
                self.sample_letters = (string.ascii_letters + string.digits +
                                       string.punctuation)
            elif self.enabled_alphanumeric:
                self.sample_letters = string.ascii_letters + string.digits
            else:
                self.sample_letters = string.ascii_letters

    def generate_random_number(self, digits=None):
        range_start = 10**(digits - 1)
        range_end = (10**digits) - 1
        return random.randint(range_start, range_end)
        # print(self.test_case_gen.generate_random_number(digits=4))

    def generate_random_useful_word(self, length=None):
        return self.gen.word()
        # print(self.test_case_gen.generate_random_useful_word())

    def generate_random_useless_word(self, length=None):
        if self.enabled_repetition:
            self.parametric_test_case_init()
            sample_list = list(self.sample_letters)
            random.shuffle(sample_list)
            final_string = "".join(sample_list)
            return final_string
        else:
            return "".join(random.sample(self.sample_letters, length))
        # print(self.test_case_gen.generate_random_useless_word())

    def generate_random_sentence(self):
        return self.gen.sentence()
        # print(self.test_case_gen.generate_random_sentence())

    def generate_random_paragraph(self):
        return self.gen.paragraph()
        # print(self.test_case_gen.generate_random_paragraph())

    def generate_random_choice_from_list(self, col, choices):
        return random.sample(col, choices)

Beispiel #5

0

Datei anzeigen

Datei: gen.py Projekt: god-s-perfect-idiot/github-activity-booster

def gen_file():

    g = DocumentGenerator()
    content = g.paragraph()
    a = ['py', 'java', 'c', 'cs', 'js', 'sh']
    s = random.randint(0, 5)
    c = gen()
    name = c + "." + a[s]
    with open(name, 'w') as f:
        f.write(content)

    return name

Beispiel #6

0

Datei anzeigen

class Solution:
    def __init__(self):
        self.gen = DocumentGenerator()

    def my_code(self, sentence):
        index = 0
        sentence = list(sentence)
        length = len(sentence)
        while index != length:
            if sentence[index] == " ":
                sentence[index] = "%20"
            index += 1
        res = ""
        for c in sentence:
            res = res + c
        return res

    def ideal_code(self, sentence):
        return sentence.replace(" ", "%20")

    def test_my_code(self):
        start_time = time.time()
        test_case = 15
        successful_count = 0
        failed_count = 0
        for iteration in range(1, test_case + 1):
            print(
                "_______________________________________________________________________________________________"
            )
            try:
                test_sentence = self.gen.paragraph()
                print(f"Executing test case {iteration}: {test_sentence}")
                my_result = self.my_code(test_sentence)
                ideal_result = self.ideal_code(test_sentence)
                assert my_result == ideal_result
                successful_count += 1
                print(f"\nTest case {iteration} passed ! ✔️")
            except (AssertionError, IndexError) as err:
                failed_count += 1
                print(f"Test case {iteration} failed ! ❌")
                print(traceback.print_exc())

        print(
            "====================================================================================================="
        )
        print(f"Total Test Cases Run : {test_case}")
        print(f"Total Test Cases Successful ✔️: {successful_count}")
        print(f"Total Test Cases Failed ❌: {failed_count}")
        print(
            f"Time of Execution of {test_case} test cases : {time.time() - start_time}"
        )

Beispiel #7

0

Datei anzeigen

Datei: generate_text_v1.py Projekt: reekithak/English-Proficiency-Tester-Modules

def generate_to_read():  #Module to get the basic text for the person to read !
    from essential_generators import DocumentGenerator
    from cleantext import clean
    gen = DocumentGenerator()
    para = gen.paragraph()
    l = len(para)

    while (l < 400):
        para = gen.paragraph()
        l = len(para)

    para = clean(
        para,
        fix_unicode=True,  # fix various unicode errors
        to_ascii=True,  # transliterate to closest ASCII representation
        lower=True,  # lowercase text
        no_line_breaks=
        False,  # fully strip line breaks as opposed to only normalizing them
        no_urls=False,  # replace all URLs with a special token
        no_emails=False,  # replace all email addresses with a special token
        no_phone_numbers=False,  # replace all phone numbers with a special token
        no_numbers=False,  # replace all numbers with a special token
        no_digits=False,  # replace all digits with a special token
        no_currency_symbols=
        False,  # replace all currency symbols with a special token
        no_punct=False,  # remove punctuations
        replace_with_punct=
        "",  # instead of removing punctuations you may replace them
        replace_with_url="<URL>",
        replace_with_email="<EMAIL>",
        replace_with_phone_number="<PHONE>",
        replace_with_number="<NUMBER>",
        replace_with_digit="0",
        replace_with_currency_symbol="<CUR>",
        lang="en"  # set to 'de' for German special handling
    )

    return para

Beispiel #8

0

Datei anzeigen

Datei: create_events.py Projekt: mdgoodrum/comethru

    def handle(self, *args, **options):
        gen = DocumentGenerator()

        for i in range(options['count']):
            params = {
                'title': gen.sentence(),
                'date_published': timezone.now(),
                'short_description': gen.sentence(),
                'description': gen.paragraph(),
                'twenty_one': random.choice([True, False]),
                'organizer': User.objects.order_by("?").first(),
                'venue': Venue.objects.order_by("?").first(),
                'start_time': timezone.now(),
                'end_time': timezone.now(),
            }
            event = Event.objects.create(**params)

            tag_ids = list(Tag.objects.values_list('id', flat=True))
            tag_ids = random.sample(tag_ids, min(len(tag_ids), 5))
            event.tags.set(tag_ids)
            event.save()

Beispiel #9

0

Datei anzeigen

def generate_email_sample(query, output):
    gen = DocumentGenerator()
    email_args = ["--recipients", "--subject", "--body", "--when"]
    include = np.random.choice(1, len(email_args))
    include[0] = 1
    recipients = []
    subject = ""
    body = ""
    when = ""

    # Recipients
    output.append("ARG")
    num_recipients = random.randint(1, 5)
    for i in range(0, num_recipients):
        # TODO(alexander): maybe not only provide name of random people
        entry = ""
        if random.randint(0, 1) == 0: entry = gen.email()
        else: entry = names.get_full_name()
        recipients.append(entry)
        output.append(entry)
        if i < num_recipients - 1:
            output.append("AND")

    # Email subject
    output.append("ARG")
    subject = gen.sentence()
    output.append(subject)

    # Email body
    output.append("ARG")
    body = gen.paragraph()
    output.append(body)

    # Genreate when
    output.append("ARG")
    now = datetime.datetime.now()
    when = now.strftime("%Y-%m-%d %H:%M:%S")
    output.append(when)

    inputs = " ".join(["email"])

Beispiel #10

0

Datei anzeigen

def create_random_interactions(request):
    users = MyUser.objects.all()
    gen = DocumentGenerator()
    for user in users:
        create_post_probs = random.randint(1, 15)
        if create_post_probs == 1:
            text_content = gen.paragraph()
            Post.objects.create(user=user, text_content=text_content)

    for user in users:
        for person in user.followings.all():
            person_posts = Post.objects.filter(user=person)
            for post in person_posts:
                like_post_probs = random.randint(1, 12)
                if like_post_probs == 1:
                    post.liked_by.add(user)

                comment_post_probs = random.randint(1, 17)
                if comment_post_probs == 1:
                    Comment.objects.create(post=post,
                                           user=user,
                                           content=gen.sentence())

    return JsonResponse([], safe=False)

Beispiel #11

0

Datei anzeigen

def test_translate_generated_paragraph():
    generator = DocumentGenerator()
    text = generator.paragraph()
    translation = translate("EN", "DE", text)
    assert len(translation) > 1

Beispiel #12

0

Datei anzeigen

Datei: main.py Projekt: sayandafadar/Typing-speed-app

from tkinter import *
from tkinter import messagebox
from essential_generators import DocumentGenerator
import sys

# ---------------------------- CONSTANTS ------------------------------- #

gen = DocumentGenerator()
new_paragraph = gen.paragraph(min_sentences=10, max_sentences=16)
print(new_paragraph)

PINK = "#e2979c"
RED = "#e7305b"
GREEN = "#9bdeac"
YELLOW = "#f7f5dd"
FONT_NAME = "Courier"
typed_word = 0
accurate_words = None


# ---------------------------- TIMER RESET ------------------------------- #
def paragraph():
    global new_paragraph
    new_paragraph = gen.paragraph(min_sentences=10, max_sentences=16)
    text_samp.config(text=new_paragraph)


def inputed_words():
    global accurate_words
    t = new_paragraph.split()
    words = input_text.get()

Beispiel #13

0

Datei anzeigen

Datei: create_pickle_and_text_files.py Projekt: thanusiv/Open-Source-License-Validator

def main():
    """
    This file creates the pickle files that will be used by all the models and the text files for fastText. The
    preprocessing is handled here as well using the preprocessor class.
    """
    os.chdir('../../all_files_generated/csv_files')
    current_dir = os.getcwd()
    data_java_csv_path = os.path.join(current_dir, 'data_java.csv')
    open_source_licenses_path = os.path.join(current_dir,
                                             'open_source_licenses.csv')

    data_java_df = pd.read_csv(data_java_csv_path)
    data_java_df.drop_duplicates(inplace=True)

    open_source_df = pd.read_csv(open_source_licenses_path)
    open_source_df.drop_duplicates(inplace=True)
    open_source_df['label'] = open_source_df['license_type'].apply(
        lambda x: 0 if x == 'INVALID' else 1)
    open_source_df = open_source_df[['comment_block_text', 'label']]

    # generate some new comment blocks that contain some ninka text so that the model can differentiate that just
    # because a specific word is in the comment doesn't mean it contains a license
    gen = DocumentGenerator()
    number_of_generated_blocks = 1000
    generated_data = [
        gen.paragraph(min_sentences=1)
        for i in range(number_of_generated_blocks)
    ]
    generated_data_labels = [0 for i in range(number_of_generated_blocks)]

    with open(
            os.path.join(
                os.path.dirname(__file__) + '/words_for_generation.dict'),
            'r') as f:
        license_keywords = [
            x.strip().lower() for x in filter(
                lambda x: not x.startswith("#") and x, f.readlines())
        ]

    updated_generated_data = []
    for x in generated_data:
        number_of_select_words = random.randint(0, 15)
        select_words = []

        for _ in range(number_of_select_words):
            select_words.append(random.choice(license_keywords))

        for word in select_words:
            insertion_index = random.randint(0, len(x) - 1)
            x = x[:insertion_index] + ' ' + word + ' ' + x[insertion_index:]

        updated_generated_data.append(x)

    generated_data_df = pd.DataFrame({
        'comment_block_text': updated_generated_data,
        'label': generated_data_labels
    })
    combined_df = pd.concat([data_java_df, open_source_df, generated_data_df],
                            ignore_index=True).drop_duplicates(keep='first')
    combined_df = combined_df.append(
        combined_df[combined_df.label == 1].sample(n=3000)
    )  # comment this out if you do not wish to have oversampling

    print('Number of Invalid Comment Blocks (0):',
          str(len(combined_df[combined_df.label == 0])))
    print('number of Valid Comment Blocks (1):',
          str(len(combined_df[combined_df.label == 1])))

    preprocessor = Preprocessor()
    print('Start preprocessing ...')
    combined_df['clean_text'] = combined_df['comment_block_text'].apply(
        lambda n: preprocessor.preprocess(str(n)))
    open_source_df['clean_text'] = open_source_df['comment_block_text'].apply(
        lambda n: preprocessor.preprocess(str(n)))
    print('Finish preprocessing')

    # 60% train, 20% validation, 20% test
    x_temp, x_test, y_temp, y_test = train_test_split(
        combined_df['clean_text'],
        combined_df['label'],
        train_size=0.8,
        test_size=0.2,
        random_state=8)
    x_train, x_validation, y_train, y_validation = train_test_split(
        x_temp, y_temp, train_size=0.75, test_size=0.25, random_state=8)

    os.chdir("../../all_files_generated/text_files")
    current_dir = os.getcwd()

    train_text_file_dir = os.path.join(current_dir, 'train.txt')
    validation_text_file_dir = os.path.join(current_dir, 'validation.txt')
    train_validation_text_file_dir = os.path.join(current_dir,
                                                  'train_validation.txt')
    test_text_file_dir = os.path.join(current_dir, 'test.txt')

    print('Creating text files for the fastText model ...')

    print('Start writing to train.txt ...')
    with open(train_text_file_dir, 'w', encoding='utf-8') as training_file:
        for x, y in zip(x_train, y_train):
            # print(x + ' __label__' + str(y))
            training_file.write(x + ' __label__' + str(y) + '\n')
            # print('-------------')
    print('Finished writing to train.txt')

    print('Start writing to validation.txt ...')
    with open(validation_text_file_dir, 'w',
              encoding='utf-8') as validation_file:
        for x, y in zip(x_validation, y_validation):
            # print(x + ' __label__' + str(y))
            validation_file.write(x + ' __label__' + str(y) + '\n')
            # print('-------------')

    print('Finished writing to validation.txt')

    print('Start writing to test.txt ...')
    with open(test_text_file_dir, 'w', encoding='utf-8') as test_file:
        for x, y in zip(x_validation, y_validation):
            # print(x + ' __label__' + str(y))
            test_file.write(x + ' __label__' + str(y) + '\n')
            # print('-------------')

    print('Finished writing to test.txt')

    print('Start writing to train_validation.txt ...')
    with open(train_validation_text_file_dir, 'w',
              encoding='utf-8') as train_validation_file:
        for x, y in zip(x_train, y_train):
            train_validation_file.write(x + ' __label__' + str(y) + '\n')
        for x, y in zip(x_validation, y_validation):
            train_validation_file.write(x + ' __label__' + str(y) + '\n')

    print('Finished writing to train_validation.txt')

    print(
        'Finished creating the text files. They will be found in all_files_generated/text_files'
    )

    # Parameter selection for TFIDF
    ngram_range = (1, 2)
    min_df = 10
    max_df = 1.
    max_features = 300
    category = {'not_license': 0, 'license': 1}

    # using some default parameters here. Change this to whatever you like using the parameters above
    vectorizer = TfidfVectorizer()
    x_train_vectors = vectorizer.fit_transform(x_train)
    x_validation_vectors = vectorizer.transform(x_validation)
    x_test_vectors = vectorizer.transform(x_test)

    os.chdir("../../all_files_generated/data_pickles")
    current_dir = os.getcwd()

    x_train_dir = os.path.join(current_dir, 'x_train.pickle')
    x_validation_dir = os.path.join(current_dir, 'x_validation.pickle')
    x_test_dir = os.path.join(current_dir, 'x_test.pickle')
    y_train_dir = os.path.join(current_dir, 'y_train.pickle')
    y_validation_dir = os.path.join(current_dir, 'y_validation.pickle')
    y_test_dir = os.path.join(current_dir, 'y_test.pickle')

    df_dir = os.path.join(current_dir, 'df.pickle')

    print('Creating pickle files ...')

    # x_train
    with open(x_train_dir, 'wb') as output:
        pickle.dump(x_train_vectors, output)

    # x_validation
    with open(x_validation_dir, 'wb') as output:
        pickle.dump(x_validation_vectors, output)

    # x_test
    with open(x_test_dir, 'wb') as output:
        pickle.dump(x_test_vectors, output)

    # y_train
    with open(y_train_dir, 'wb') as output:
        pickle.dump(y_train, output)

    # y_validation
    with open(y_validation_dir, 'wb') as output:
        pickle.dump(y_validation, output)

    # y_test
    with open(y_test_dir, 'wb') as output:
        pickle.dump(y_test, output)

    # df
    with open(df_dir, 'wb') as output:
        pickle.dump(combined_df, output)

    print(
        'Finished creation of pickle files. They will be found in all_files_generated/data_pickles.'
    )

Beispiel #14

0

Datei anzeigen


for i in range(generate_num):
    user_one = random.choice(user_list)
    user_two = random.choice(user_list)
    query1 = Q(user_one=user_one) & Q(user_two=user_two)
    query2 = Q(user_one=user_two) & Q(user_two=user_one)
    finalQuery = DirectConversationRecords.objects.filter(query1 | query2)
    if finalQuery.exists():
        direct_convo =  finalQuery[0]
        mssg = Messages(from_User = user_one,  to_User = user_two, direct_conversation_id = direct_convo)
        if random.randint(0,9) <= 9 :
            mssgRand = gen.sentence()
            mssg.message = mssgRand
        else :
            mssgRand = gen.paragraph()
            mssg.message = mssgRand
        mssg.save()
    else :
        direct_convo =  DirectConversationRecords(user_one = user_one, user_two = user_two)
        direct_convo.save()
        mssg = Messages(from_User = user_one,  to_User = user_two, direct_conversation_id = direct_convo)
        if random.randint(0,9) <= 9 :
            mssgRand = gen.sentence()
            mssg.message = mssgRand
        else :
            mssgRand = gen.paragraph()
            mssg.message = mssgRand
        mssg.save()

Beispiel #15

0

Datei anzeigen

            if word in count.keys():
                count[word] = count[word] + 1
            else:
                count[word] = 1
        return count


v = VectorCompare()
gen = DocumentGenerator()
documentDic = {}
index = {}
searchterm = ""

#test with a small sample size of 1000 documents
for x in range(0, 1000):
    documentDic[x] = gen.paragraph()

for x in range(0, 1000):
    index[x] = v.concordance(documentDic[x].lower())

#search for inputed word
while (searchterm != "<exit>"):

    searchterm = raw_input('Enter Search Term: ')
    matches = []

    for i in range(len(index)):
        con = v.concordance(searchterm.lower())
        relation = v.relation(con, index[i])
        if relation != 0:
            matches.append((relation, documentDic[i][:100]))

Beispiel #16

0

Datei anzeigen

Datei: fill.py Projekt: lyellread/write365filler

#!/usr/bin/python3

import os, random, time, sys
from essential_generators import DocumentGenerator

gen = DocumentGenerator()
running = ""
n = random.randint(8, 16)
for x in range(0, n):
    running += gen.paragraph()

if len(sys.argv) > 1 and (sys.argv[1] == "-v" or sys.argv[1] == "--verbose"):
    os.system(
        '''echo "Starting Write Process, select target in next 20 seconds" | wall'''
    )

print("Chosen n: ", n)

time.sleep(20)

os.system('''xdotool type "''' +
          running.replace('''"''', "").replace("(", "").replace(")", "") +
          '''"''')
#print('''xdotool type "''' + running.replace('''"''',"") + '''"''')

Beispiel #17

0

Datei anzeigen

Datei: main.py Projekt: NguyenThanhDat741862/POC-spark-structured-streaming

import socket
import time
from essential_generators import DocumentGenerator

gen = DocumentGenerator()

host, port = ('127.0.0.1', 9999)

print("Starting server")
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    s.bind((host, port))
    s.listen()
    conn, addr = s.accept()
    with conn:
        print('Connected by', addr)
        while True:
            conn.sendall(gen.paragraph().encode())
            time.sleep(1)